![](https://secure.gravatar.com/avatar/df8330968b6df8cd1c1942c5fb4b720c.jpg?s=120&d=mm&r=g)
commit 701f83bc54d5b4f1919603381694362c57bb001c Author: Juergen Daubert <jue@jue.li> Date: Wed Feb 9 11:53:46 2022 +0100 glibc: sync with upstream 2.23 branch diff --git a/glibc/.signature b/glibc/.signature index e5d5a599..dbf06195 100644 --- a/glibc/.signature +++ b/glibc/.signature @@ -1,10 +1,10 @@ untrusted comment: verify with /etc/ports/core.pub -RWRJc1FUaeVeqk2uwhHr5qBGXvnIwjOlb4TZ9ezJT2bi3Z+8Ym4D2uHdm5+VLuIIRBBmUYjvoKwBTb1m7j6tV9D+0QxRS+wJeA0= -SHA256 (Pkgfile) = d319ce6af6100d9570cdff2c31542c6404ffb7461283de1ae4d098d0853d5d5a +RWRJc1FUaeVeqjtuBDt9yWal+iJYK0LWLJghiZ1hQdV2j6Ll57N+c9XCp1EGfeMSTdC+Gvnan8BuI2jIRtKjih5DTvd+TEJDrw8= +SHA256 (Pkgfile) = bf6a5730d314fecf8a04f0029193a9c64eafabde1e8fc77a0860076d7eb37d7b SHA256 (.footprint) = aa29daaba0d990bb954964b7605cf132588bcb5ee9cf56d219f2ed26e60eba7b SHA256 (glibc-2.32.tar.xz) = 1627ea54f5a1a8467032563393e0901077626dc66f37f10ee6363bb722222836 SHA256 (linux-5.4.72.tar.xz) = 0e24645bd56fe5b55a7a662895f5562c103d71b54d097281f0c9c71ff22c1172 -SHA256 (glibc-2.32-6.patch) = 70f33f14f62cb2daddd9bbfe0ffdfb3bb01880f7cedbc71cb534e82343d4a3d4 +SHA256 (glibc-2.32-7.patch) = 765880b63e386bfab3898bbc273dbc683f737d6327289dcf968da2fe807575e4 SHA256 (hosts) = 5c02b256c105f1d4a12fb738d71c1bab9eb126533074d7a0c8a14b92670c9431 SHA256 (resolv.conf) = 72ccb58768a72a771ec37142bc361a18478a07ec9de6e925a20760794389bf51 SHA256 (nsswitch.conf) = 859b8984e5e90aff3cce8f9779996ae4033b280d2122840e9411e2f44a1c2e61 diff --git a/glibc/Pkgfile b/glibc/Pkgfile index 3c179148..a761b676 100644 --- a/glibc/Pkgfile +++ b/glibc/Pkgfile @@ -4,10 +4,10 @@ name=glibc version=2.32 -release=6 +release=7 source=(https://ftp.gnu.org/gnu/glibc/glibc-$version.tar.xz \ https://www.kernel.org/pub/linux/kernel/v5.x/linux-5.4.72.tar.xz \ - glibc-$version-6.patch \ + glibc-$version-7.patch \ hosts resolv.conf nsswitch.conf host.conf ld.so.conf) build() { @@ -18,7 +18,7 @@ build() { make INSTALL_HDR_PATH=$PKG/usr headers_install chown root:root $PKG/usr - patch -p1 -d $SRC/$name-${version:0:4} -i $SRC/$name-$version-6.patch + patch -p1 -d $SRC/$name-${version:0:4} -i $SRC/$name-$version-7.patch mkdir $SRC/build cd $SRC/build diff --git a/glibc/glibc-2.32-6.patch b/glibc/glibc-2.32-6.patch deleted file mode 100644 index 5d4a58d2..00000000 --- a/glibc/glibc-2.32-6.patch +++ /dev/null @@ -1,6671 +0,0 @@ -diff --git a/NEWS b/NEWS -index 485b8ddffa..b29826f4f5 100644 ---- a/NEWS -+++ b/NEWS -@@ -5,6 +5,27 @@ See the end for copying conditions. - Please send GNU C library bug reports via <https://sourceware.org/bugzilla/> - using `glibc' in the "product" field. - -+The following bugs are resolved with this release: -+ -+ [20019] NULL pointer dereference in libc.so.6 IFUNC due to uninitialized GOT -+ [26224] iconv hangs when converting some invalid inputs from several IBM -+ character sets (CVE-2020-27618) -+ [26534] libm.so 2.32 SIGILL in pow() due to FMA4 instruction on non-FMA4 -+ system -+ [26555] string: strerrorname_np does not return the documented value -+ [26600] Transaction ID collisions cause slow DNS lookups in getaddrinfo -+ [26636] libc: 32-bit shmctl(IPC_INFO) crashes when shminfo struct is -+ at the end of a memory mapping -+ [26637] libc: semctl SEM_STAT_ANY fails to pass the buffer specified -+ by the caller to the kernel -+ [26639] libc: msgctl IPC_INFO and MSG_INFO return garbage -+ [26853] aarch64: Missing unwind information in statically linked startup code -+ [26932] libc: sh: Multiple floating point functions defined as stubs only -+ [27130] "rep movsb" performance issue -+ [27177] GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on doesn't work -+ [28524] Conversion from ISO-2022-JP-3 with iconv may emit spurious NULs -+ [28607] Masked signals are delivered on thread exit -+ - Version 2.32 - - Major new features: -@@ -185,6 +206,14 @@ Security related changes: - Dytrych of the Cisco Security Assessment and Penetration Team (See - TALOS-2020-1019). - -+ CVE-2020-27618: An infinite loop has been fixed in the iconv program when -+ invoked with input containing redundant shift sequences in the IBM1364, -+ IBM1371, IBM1388, IBM1390, or IBM1399 character sets. -+ -+ CVE-2021-33574: The mq_notify function has a potential use-after-free -+ issue when using a notification type of SIGEV_THREAD and a thread -+ attribute with a non-default affinity mask. -+ - The following bugs are resolved with this release: - - [9809] localedata: ckb_IQ: new Kurdish Sorani locale -diff --git a/Rules b/Rules -index 8b771f6095..beab969fde 100644 ---- a/Rules -+++ b/Rules -@@ -155,6 +155,7 @@ xtests: tests $(xtests-special) - else - tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \ - $(tests-container:%=$(objpfx)%.out) \ -+ $(tests-mcheck:%=$(objpfx)%-mcheck.out) \ - $(tests-special) $(tests-printers-out) - xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special) - endif -@@ -165,7 +166,7 @@ ifeq ($(run-built-tests),no) - tests-expected = - else - tests-expected = $(tests) $(tests-internal) $(tests-printers) \ -- $(tests-container) -+ $(tests-container) $(tests-mcheck:%=%-mcheck) - endif - tests: - $(..)scripts/merge-test-results.sh -s $(objpfx) $(subdir) \ -@@ -191,6 +192,7 @@ else - binaries-pie-tests = - binaries-pie-notests = - endif -+binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck) - else - binaries-all-notests = - binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs) -@@ -200,6 +202,7 @@ binaries-static-tests = - binaries-static = - binaries-pie-tests = - binaries-pie-notests = -+binaries-mcheck-tests = - endif - - binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests) -@@ -223,6 +226,14 @@ $(addprefix $(objpfx),$(binaries-shared-tests)): %: %.o \ - $(+link-tests) - endif - -+ifneq "$(strip $(binaries-mcheck-tests))" "" -+$(addprefix $(objpfx),$(binaries-mcheck-tests)): %-mcheck: %.o \ -+ $(link-extra-libs-tests) \ -+ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ -+ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) -+ $(+link-tests) -+endif -+ - ifneq "$(strip $(binaries-pie-tests))" "" - $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \ - $(link-extra-libs-tests) \ -@@ -253,6 +264,12 @@ $(addprefix $(objpfx),$(binaries-static-tests)): %: %.o \ - $(+link-static-tests) - endif - -+# All mcheck tests will be run with MALLOC_CHECK_=3 -+define mcheck-ENVS -+$(1)-mcheck-ENV = MALLOC_CHECK_=3 -+endef -+$(foreach t,$(tests-mcheck),$(eval $(call mcheck-ENVS,$(t)))) -+ - ifneq "$(strip $(tests) $(tests-internal) $(xtests) $(test-srcs))" "" - # These are the implicit rules for making test outputs - # from the test programs and whatever input files are present. -diff --git a/debug/Makefile b/debug/Makefile -index 3a60d7af7a..0036edd187 100644 ---- a/debug/Makefile -+++ b/debug/Makefile -@@ -51,7 +51,7 @@ routines = backtrace backtracesyms backtracesymsfd noophooks \ - explicit_bzero_chk \ - stack_chk_fail fortify_fail \ - $(static-only-routines) --static-only-routines := warning-nop stack_chk_fail_local -+static-only-routines := stack_chk_fail_local - - # Don't add stack_chk_fail_local.o to libc.a since __stack_chk_fail_local - # is an alias of __stack_chk_fail in stack_chk_fail.o. -diff --git a/debug/warning-nop.c b/debug/warning-nop.c -deleted file mode 100644 -index 4ab7e182b7..0000000000 ---- a/debug/warning-nop.c -+++ /dev/null -@@ -1,70 +0,0 @@ --/* Dummy nop functions to elicit link-time warnings. -- Copyright (C) 2005-2020 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- In addition to the permissions in the GNU Lesser General Public -- License, the Free Software Foundation gives you unlimited -- permission to link the compiled version of this file with other -- programs, and to distribute those programs without any restriction -- coming from the use of this file. (The GNU Lesser General Public -- License restrictions do apply in other respects; for example, they -- cover modification of the file, and distribution when not linked -- into another program.) -- -- Note that people who make modified versions of this file are not -- obligated to grant this special exception for their modified -- versions; it is their choice whether to do so. The GNU Lesser -- General Public License gives permission to release a modified -- version without this exception; this exception also makes it -- possible to release a modified version which carries forward this -- exception. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- <https://www.gnu.org/licenses/>. */ -- --#include <sys/cdefs.h> -- --static void --__attribute__ ((used)) --nop (void) --{ --} -- --/* Don't insert any other #include's before this #undef! */ -- --#undef __warndecl --#define __warndecl(name, msg) \ -- extern void name (void) __attribute__ ((alias ("nop"))) attribute_hidden; \ -- link_warning (name, msg) -- --#undef __USE_FORTIFY_LEVEL --#define __USE_FORTIFY_LEVEL 99 -- --/* Following here we need an #include for each public header file -- that uses __warndecl. */ -- --/* Define away to avoid warnings with compilers that do not have these -- builtins. */ --#define __builtin___memcpy_chk(dest, src, len, bos) NULL --#define __builtin___memmove_chk(dest, src, len, bos) NULL --#define __builtin___mempcpy_chk(dest, src, len, bos) NULL --#define __builtin___memset_chk(dest, ch, len, bos) NULL --#define __builtin___stpcpy_chk(dest, src, bos) NULL --#define __builtin___strcat_chk(dest, src, bos) NULL --#define __builtin___strcpy_chk(dest, src, bos) NULL --#define __builtin___strncat_chk(dest, src, len, bos) NULL --#define __builtin___strncpy_chk(dest, src, len, bos) NULL --#define __builtin_object_size(bos, level) 0 -- --#include <string.h> -diff --git a/elf/Makefile b/elf/Makefile -index 0b78721848..3ba7f4ecfc 100644 ---- a/elf/Makefile -+++ b/elf/Makefile -@@ -1381,6 +1381,8 @@ CFLAGS-ifuncmain7pie.c += $(pie-ccflag) - CFLAGS-ifuncmain9pie.c += $(pie-ccflag) - CFLAGS-tst-ifunc-textrel.c += $(pic-ccflag) - -+LDFLAGS-ifuncmain6pie = -Wl,-z,lazy -+ - $(objpfx)ifuncmain1pie: $(objpfx)ifuncmod1.so - $(objpfx)ifuncmain1staticpie: $(objpfx)ifuncdep1pic.o - $(objpfx)ifuncmain1vispie: $(objpfx)ifuncmod1.so -@@ -1630,8 +1632,6 @@ $(objpfx)tst-nodelete-dlclose.out: $(objpfx)tst-nodelete-dlclose-dso.so \ - - tst-env-setuid-ENV = MALLOC_CHECK_=2 MALLOC_MMAP_THRESHOLD_=4096 \ - LD_HWCAP_MASK=0x1 --tst-env-setuid-tunables-ENV = \ -- GLIBC_TUNABLES=glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096 - - $(objpfx)tst-debug1: $(libdl) - $(objpfx)tst-debug1.out: $(objpfx)tst-debug1mod1.so -diff --git a/elf/dl-load.c b/elf/dl-load.c -index e39980fb19..71867e7c1a 100644 ---- a/elf/dl-load.c -+++ b/elf/dl-load.c -@@ -855,10 +855,12 @@ lose (int code, int fd, const char *name, char *realname, struct link_map *l, - - /* Process PT_GNU_PROPERTY program header PH in module L after - PT_LOAD segments are mapped. Only one NT_GNU_PROPERTY_TYPE_0 -- note is handled which contains processor specific properties. */ -+ note is handled which contains processor specific properties. -+ FD is -1 for the kernel mapped main executable otherwise it is -+ the fd used for loading module L. */ - - void --_dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph) -+_dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) *ph) - { - const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr); - const ElfW(Addr) size = ph->p_memsz; -@@ -905,7 +907,7 @@ _dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph) - last_type = type; - - /* Target specific property processing. */ -- if (_dl_process_gnu_property (l, type, datasz, ptr) == 0) -+ if (_dl_process_gnu_property (l, fd, type, datasz, ptr) == 0) - return; - - /* Check the next property item. */ -@@ -1251,21 +1253,6 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, - maplength, has_holes, loader); - if (__glibc_unlikely (errstring != NULL)) - goto call_lose; -- -- /* Process program headers again after load segments are mapped in -- case processing requires accessing those segments. Scan program -- headers backward so that PT_NOTE can be skipped if PT_GNU_PROPERTY -- exits. */ -- for (ph = &phdr[l->l_phnum]; ph != phdr; --ph) -- switch (ph[-1].p_type) -- { -- case PT_NOTE: -- _dl_process_pt_note (l, &ph[-1]); -- break; -- case PT_GNU_PROPERTY: -- _dl_process_pt_gnu_property (l, &ph[-1]); -- break; -- } - } - - if (l->l_ld == 0) -@@ -1377,6 +1364,21 @@ cannot enable executable stack as shared object requires"); - if (l->l_tls_initimage != NULL) - l->l_tls_initimage = (char *) l->l_tls_initimage + l->l_addr; - -+ /* Process program headers again after load segments are mapped in -+ case processing requires accessing those segments. Scan program -+ headers backward so that PT_NOTE can be skipped if PT_GNU_PROPERTY -+ exits. */ -+ for (ph = &l->l_phdr[l->l_phnum]; ph != l->l_phdr; --ph) -+ switch (ph[-1].p_type) -+ { -+ case PT_NOTE: -+ _dl_process_pt_note (l, fd, &ph[-1]); -+ break; -+ case PT_GNU_PROPERTY: -+ _dl_process_pt_gnu_property (l, fd, &ph[-1]); -+ break; -+ } -+ - /* We are done mapping in the file. We no longer need the descriptor. */ - if (__glibc_unlikely (__close_nocancel (fd) != 0)) - { -diff --git a/elf/dl-open.c b/elf/dl-open.c -index 8769e47051..55b39e1bbe 100644 ---- a/elf/dl-open.c -+++ b/elf/dl-open.c -@@ -887,7 +887,7 @@ no more namespaces available for dlmopen()")); - /* Avoid keeping around a dangling reference to the libc.so link - map in case it has been cached in libc_map. */ - if (!args.libc_already_loaded) -- GL(dl_ns)[nsid].libc_map = NULL; -+ GL(dl_ns)[args.nsid].libc_map = NULL; - - /* Remove the object from memory. It may be in an inconsistent - state if relocation failed, for example. */ -diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c -index 26e6e26612..15b29bcb90 100644 ---- a/elf/dl-tunables.c -+++ b/elf/dl-tunables.c -@@ -177,6 +177,7 @@ parse_tunables (char *tunestr, char *valstring) - return; - - char *p = tunestr; -+ size_t off = 0; - - while (true) - { -@@ -190,7 +191,11 @@ parse_tunables (char *tunestr, char *valstring) - /* If we reach the end of the string before getting a valid name-value - pair, bail out. */ - if (p[len] == '\0') -- return; -+ { -+ if (__libc_enable_secure) -+ tunestr[off] = '\0'; -+ return; -+ } - - /* We did not find a valid name-value pair before encountering the - colon. */ -@@ -216,35 +221,28 @@ parse_tunables (char *tunestr, char *valstring) - - if (tunable_is_name (cur->name, name)) - { -- /* If we are in a secure context (AT_SECURE) then ignore the tunable -- unless it is explicitly marked as secure. Tunable values take -- precedence over their envvar aliases. */ -+ /* If we are in a secure context (AT_SECURE) then ignore the -+ tunable unless it is explicitly marked as secure. Tunable -+ values take precedence over their envvar aliases. We write -+ the tunables that are not SXID_ERASE back to TUNESTR, thus -+ dropping all SXID_ERASE tunables and any invalid or -+ unrecognized tunables. */ - if (__libc_enable_secure) - { -- if (cur->security_level == TUNABLE_SECLEVEL_SXID_ERASE) -+ if (cur->security_level != TUNABLE_SECLEVEL_SXID_ERASE) - { -- if (p[len] == '\0') -- { -- /* Last tunable in the valstring. Null-terminate and -- return. */ -- *name = '\0'; -- return; -- } -- else -- { -- /* Remove the current tunable from the string. We do -- this by overwriting the string starting from NAME -- (which is where the current tunable begins) with -- the remainder of the string. We then have P point -- to NAME so that we continue in the correct -- position in the valstring. */ -- char *q = &p[len + 1]; -- p = name; -- while (*q != '\0') -- *name++ = *q++; -- name[0] = '\0'; -- len = 0; -- } -+ if (off > 0) -+ tunestr[off++] = ':'; -+ -+ const char *n = cur->name; -+ -+ while (*n != '\0') -+ tunestr[off++] = *n++; -+ -+ tunestr[off++] = '='; -+ -+ for (size_t j = 0; j < len; j++) -+ tunestr[off++] = value[j]; - } - - if (cur->security_level != TUNABLE_SECLEVEL_NONE) -@@ -257,9 +255,7 @@ parse_tunables (char *tunestr, char *valstring) - } - } - -- if (p[len] == '\0') -- return; -- else -+ if (p[len] != '\0') - p += len + 1; - } - } -diff --git a/elf/ifuncmain6pie.c b/elf/ifuncmain6pie.c -index 04faeb86ef..4a01906836 100644 ---- a/elf/ifuncmain6pie.c -+++ b/elf/ifuncmain6pie.c -@@ -9,7 +9,6 @@ - #include "ifunc-sel.h" - - typedef int (*foo_p) (void); --extern foo_p foo_ptr; - - static int - one (void) -@@ -28,20 +27,17 @@ foo_ifunc (void) - } - - extern int foo (void); --extern foo_p get_foo (void); -+extern int call_foo (void); - extern foo_p get_foo_p (void); - --foo_p my_foo_ptr = foo; -+foo_p foo_ptr = foo; - - int - main (void) - { - foo_p p; - -- p = get_foo (); -- if (p != foo) -- abort (); -- if ((*p) () != -30) -+ if (call_foo () != -30) - abort (); - - p = get_foo_p (); -@@ -52,12 +48,8 @@ main (void) - - if (foo_ptr != foo) - abort (); -- if (my_foo_ptr != foo) -- abort (); - if ((*foo_ptr) () != -30) - abort (); -- if ((*my_foo_ptr) () != -30) -- abort (); - if (foo () != -30) - abort (); - -diff --git a/elf/ifuncmod6.c b/elf/ifuncmod6.c -index 2e16c1d06d..2f6d0715e6 100644 ---- a/elf/ifuncmod6.c -+++ b/elf/ifuncmod6.c -@@ -4,7 +4,7 @@ extern int foo (void); - - typedef int (*foo_p) (void); - --foo_p foo_ptr = foo; -+extern foo_p foo_ptr; - - foo_p - get_foo_p (void) -@@ -12,8 +12,8 @@ get_foo_p (void) - return foo_ptr; - } - --foo_p --get_foo (void) -+int -+call_foo (void) - { -- return foo; -+ return foo (); - } -diff --git a/elf/rtld.c b/elf/rtld.c -index 5b882163fa..14a42ed00a 100644 ---- a/elf/rtld.c -+++ b/elf/rtld.c -@@ -1534,10 +1534,10 @@ of this helper program; chances are you did not intend to run this program.\n\ - switch (ph[-1].p_type) - { - case PT_NOTE: -- _dl_process_pt_note (main_map, &ph[-1]); -+ _dl_process_pt_note (main_map, -1, &ph[-1]); - break; - case PT_GNU_PROPERTY: -- _dl_process_pt_gnu_property (main_map, &ph[-1]); -+ _dl_process_pt_gnu_property (main_map, -1, &ph[-1]); - break; - } - -diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c -index 971d5892b1..ca0c8c245c 100644 ---- a/elf/tst-env-setuid-tunables.c -+++ b/elf/tst-env-setuid-tunables.c -@@ -25,35 +25,76 @@ - #include "config.h" - #undef _LIBC - --#define test_parent test_parent_tunables --#define test_child test_child_tunables -- --static int test_child_tunables (void); --static int test_parent_tunables (void); -- --#include "tst-env-setuid.c" -- --#define CHILD_VALSTRING_VALUE "glibc.malloc.mmap_threshold=4096" --#define PARENT_VALSTRING_VALUE \ -- "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096" -+#include <errno.h> -+#include <fcntl.h> -+#include <stdlib.h> -+#include <stdint.h> -+#include <stdio.h> -+#include <string.h> -+#include <sys/stat.h> -+#include <sys/wait.h> -+#include <unistd.h> -+#include <intprops.h> -+#include <array_length.h> -+ -+#include <support/check.h> -+#include <support/support.h> -+#include <support/test-driver.h> -+#include <support/capture_subprocess.h> -+ -+const char *teststrings[] = -+{ -+ "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096", -+ "glibc.malloc.check=2:glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096", -+ "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096:glibc.malloc.check=2", -+ "glibc.malloc.perturb=0x800", -+ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", -+ "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", -+ "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096", -+ "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", -+ "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2", -+ "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096", -+ ":glibc.malloc.garbage=2:glibc.malloc.check=1", -+ "glibc.malloc.check=1:glibc.malloc.check=2", -+ "not_valid.malloc.check=2", -+ "glibc.not_valid.check=2", -+}; -+ -+const char *resultstrings[] = -+{ -+ "glibc.malloc.mmap_threshold=4096", -+ "glibc.malloc.mmap_threshold=4096", -+ "glibc.malloc.mmap_threshold=4096", -+ "glibc.malloc.perturb=0x800", -+ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", -+ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", -+ "glibc.malloc.mmap_threshold=4096", -+ "glibc.malloc.mmap_threshold=4096", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+}; - - static int --test_child_tunables (void) -+test_child (int off) - { - const char *val = getenv ("GLIBC_TUNABLES"); - - #if HAVE_TUNABLES -- if (val != NULL && strcmp (val, CHILD_VALSTRING_VALUE) == 0) -+ if (val != NULL && strcmp (val, resultstrings[off]) == 0) - return 0; - - if (val != NULL) -- printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val); -+ printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val); - - return 1; - #else - if (val != NULL) - { -- printf ("GLIBC_TUNABLES not cleared\n"); -+ printf ("[%d] GLIBC_TUNABLES not cleared\n", off); - return 1; - } - return 0; -@@ -61,15 +102,48 @@ test_child_tunables (void) - } - - static int --test_parent_tunables (void) -+do_test (int argc, char **argv) - { -- const char *val = getenv ("GLIBC_TUNABLES"); -+ /* Setgid child process. */ -+ if (argc == 2) -+ { -+ if (getgid () == getegid ()) -+ /* This can happen if the file system is mounted nosuid. */ -+ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", -+ (intmax_t) getgid ()); - -- if (val != NULL && strcmp (val, PARENT_VALSTRING_VALUE) == 0) -- return 0; -+ int ret = test_child (atoi (argv[1])); - -- if (val != NULL) -- printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val); -+ if (ret != 0) -+ exit (1); - -- return 1; -+ exit (EXIT_SUCCESS); -+ } -+ else -+ { -+ int ret = 0; -+ -+ /* Spawn tests. */ -+ for (int i = 0; i < array_length (teststrings); i++) -+ { -+ char buf[INT_BUFSIZE_BOUND (int)]; -+ -+ printf ("Spawned test for %s (%d)\n", teststrings[i], i); -+ snprintf (buf, sizeof (buf), "%d\n", i); -+ if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0) -+ exit (1); -+ -+ int status = support_capture_subprogram_self_sgid (buf); -+ -+ /* Bail out early if unsupported. */ -+ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) -+ return EXIT_UNSUPPORTED; -+ -+ ret |= status; -+ } -+ return ret; -+ } - } -+ -+#define TEST_FUNCTION_ARGV do_test -+#include <support/test-driver.c> -diff --git a/elf/tst-env-setuid.c b/elf/tst-env-setuid.c -index 41dc79e83a..2dbccdb69e 100644 ---- a/elf/tst-env-setuid.c -+++ b/elf/tst-env-setuid.c -@@ -29,173 +29,12 @@ - #include <sys/wait.h> - #include <unistd.h> - -+#include <support/check.h> - #include <support/support.h> - #include <support/test-driver.h> -+#include <support/capture_subprocess.h> - - static char SETGID_CHILD[] = "setgid-child"; --#define CHILD_STATUS 42 -- --/* Return a GID which is not our current GID, but is present in the -- supplementary group list. */ --static gid_t --choose_gid (void) --{ -- const int count = 64; -- gid_t groups[count]; -- int ret = getgroups (count, groups); -- if (ret < 0) -- { -- printf ("getgroups: %m\n"); -- exit (1); -- } -- gid_t current = getgid (); -- for (int i = 0; i < ret; ++i) -- { -- if (groups[i] != current) -- return groups[i]; -- } -- return 0; --} -- --/* Spawn and execute a program and verify that it returns the CHILD_STATUS. */ --static pid_t --do_execve (char **args) --{ -- pid_t kid = vfork (); -- -- if (kid < 0) -- { -- printf ("vfork: %m\n"); -- return -1; -- } -- -- if (kid == 0) -- { -- /* Child process. */ -- execve (args[0], args, environ); -- _exit (-errno); -- } -- -- if (kid < 0) -- return 1; -- -- int status; -- -- if (waitpid (kid, &status, 0) < 0) -- { -- printf ("waitpid: %m\n"); -- return 1; -- } -- -- if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) -- return EXIT_UNSUPPORTED; -- -- if (!WIFEXITED (status) || WEXITSTATUS (status) != CHILD_STATUS) -- { -- printf ("Unexpected exit status %d from child process\n", -- WEXITSTATUS (status)); -- return 1; -- } -- return 0; --} -- --/* Copies the executable into a restricted directory, so that we can -- safely make it SGID with the TARGET group ID. Then runs the -- executable. */ --static int --run_executable_sgid (gid_t target) --{ -- char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd", -- test_dir, (intmax_t) getpid ()); -- char *execname = xasprintf ("%s/bin", dirname); -- int infd = -1; -- int outfd = -1; -- int ret = 0; -- if (mkdir (dirname, 0700) < 0) -- { -- printf ("mkdir: %m\n"); -- goto err; -- } -- infd = open ("/proc/self/exe", O_RDONLY); -- if (infd < 0) -- { -- printf ("open (/proc/self/exe): %m\n"); -- goto err; -- } -- outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); -- if (outfd < 0) -- { -- printf ("open (%s): %m\n", execname); -- goto err; -- } -- char buf[4096]; -- for (;;) -- { -- ssize_t rdcount = read (infd, buf, sizeof (buf)); -- if (rdcount < 0) -- { -- printf ("read: %m\n"); -- goto err; -- } -- if (rdcount == 0) -- break; -- char *p = buf; -- char *end = buf + rdcount; -- while (p != end) -- { -- ssize_t wrcount = write (outfd, buf, end - p); -- if (wrcount == 0) -- errno = ENOSPC; -- if (wrcount <= 0) -- { -- printf ("write: %m\n"); -- goto err; -- } -- p += wrcount; -- } -- } -- if (fchown (outfd, getuid (), target) < 0) -- { -- printf ("fchown (%s): %m\n", execname); -- goto err; -- } -- if (fchmod (outfd, 02750) < 0) -- { -- printf ("fchmod (%s): %m\n", execname); -- goto err; -- } -- if (close (outfd) < 0) -- { -- printf ("close (outfd): %m\n"); -- goto err; -- } -- if (close (infd) < 0) -- { -- printf ("close (infd): %m\n"); -- goto err; -- } -- -- char *args[] = {execname, SETGID_CHILD, NULL}; -- -- ret = do_execve (args); -- --err: -- if (outfd >= 0) -- close (outfd); -- if (infd >= 0) -- close (infd); -- if (execname) -- { -- unlink (execname); -- free (execname); -- } -- if (dirname) -- { -- rmdir (dirname); -- free (dirname); -- } -- return ret; --} - - #ifndef test_child - static int -@@ -256,40 +95,32 @@ do_test (int argc, char **argv) - if (argc == 2 && strcmp (argv[1], SETGID_CHILD) == 0) - { - if (getgid () == getegid ()) -- { -- /* This can happen if the file system is mounted nosuid. */ -- fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n", -- (intmax_t) getgid ()); -- exit (EXIT_UNSUPPORTED); -- } -+ /* This can happen if the file system is mounted nosuid. */ -+ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", -+ (intmax_t) getgid ()); - - int ret = test_child (); - - if (ret != 0) - exit (1); - -- exit (CHILD_STATUS); -+ exit (EXIT_SUCCESS); - } - else - { - if (test_parent () != 0) - exit (1); - -- /* Try running a setgid program. */ -- gid_t target = choose_gid (); -- if (target == 0) -- { -- fprintf (stderr, -- "Could not find a suitable GID for user %jd, skipping test\n", -- (intmax_t) getuid ()); -- exit (0); -- } -+ int status = support_capture_subprogram_self_sgid (SETGID_CHILD); - -- return run_executable_sgid (target); -- } -+ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) -+ return EXIT_UNSUPPORTED; -+ -+ if (!WIFEXITED (status)) -+ FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); - -- /* Something went wrong and our argv was corrupted. */ -- _exit (1); -+ return 0; -+ } - } - - #define TEST_FUNCTION_ARGV do_test -diff --git a/iconv/Versions b/iconv/Versions -index 8a5f4cf780..d51af52fa3 100644 ---- a/iconv/Versions -+++ b/iconv/Versions -@@ -6,7 +6,9 @@ libc { - GLIBC_PRIVATE { - # functions shared with iconv program - __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; -- __gconv_open; __gconv_create_spec; -+ -+ # functions used elsewhere in glibc -+ __gconv_open; __gconv_create_spec; __gconv_destroy_spec; - - # function used by the gconv modules - __gconv_transliterate; -diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c -index 6ccd0773cc..4ba0aa99f5 100644 ---- a/iconv/gconv_charset.c -+++ b/iconv/gconv_charset.c -@@ -216,3 +216,13 @@ out: - return ret; - } - libc_hidden_def (__gconv_create_spec) -+ -+ -+void -+__gconv_destroy_spec (struct gconv_spec *conv_spec) -+{ -+ free (conv_spec->fromcode); -+ free (conv_spec->tocode); -+ return; -+} -+libc_hidden_def (__gconv_destroy_spec) -diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h -index b39b09aea1..e9c122cf7e 100644 ---- a/iconv/gconv_charset.h -+++ b/iconv/gconv_charset.h -@@ -48,33 +48,6 @@ - #define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE" - - --/* This function accepts the charset names of the source and destination of the -- conversion and populates *conv_spec with an equivalent conversion -- specification that may later be used by __gconv_open. The charset names -- might contain options in the form of suffixes that alter the conversion, -- e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring -- and truncating any suffix options in fromcode, and processing and truncating -- any suffix options in tocode. Supported suffix options ("TRANSLIT" or -- "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec -- to be set to true. Unrecognized suffix options are silently discarded. If -- the function succeeds, it returns conv_spec back to the caller. It returns -- NULL upon failure. */ --struct gconv_spec * --__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, -- const char *tocode); --libc_hidden_proto (__gconv_create_spec) -- -- --/* This function frees all heap memory allocated by __gconv_create_spec. */ --static void __attribute__ ((unused)) --gconv_destroy_spec (struct gconv_spec *conv_spec) --{ -- free (conv_spec->fromcode); -- free (conv_spec->tocode); -- return; --} -- -- - /* This function copies in-order, characters from the source 's' that are - either alpha-numeric or one in one of these: "_-.,:/" - into the destination - 'wp' while dropping all other characters. In the process, it converts all -diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h -index e86938dae7..f721ce30ff 100644 ---- a/iconv/gconv_int.h -+++ b/iconv/gconv_int.h -@@ -152,6 +152,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec, - __gconv_t *handle, int flags); - libc_hidden_proto (__gconv_open) - -+/* This function accepts the charset names of the source and destination of the -+ conversion and populates *conv_spec with an equivalent conversion -+ specification that may later be used by __gconv_open. The charset names -+ might contain options in the form of suffixes that alter the conversion, -+ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring -+ and truncating any suffix options in fromcode, and processing and truncating -+ any suffix options in tocode. Supported suffix options ("TRANSLIT" or -+ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec -+ to be set to true. Unrecognized suffix options are silently discarded. If -+ the function succeeds, it returns conv_spec back to the caller. It returns -+ NULL upon failure. */ -+extern struct gconv_spec * -+__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, -+ const char *tocode); -+libc_hidden_proto (__gconv_create_spec) -+ -+/* This function frees all heap memory allocated by __gconv_create_spec. */ -+extern void -+__gconv_destroy_spec (struct gconv_spec *conv_spec); -+libc_hidden_proto (__gconv_destroy_spec) -+ - /* Free resources associated with transformation descriptor CD. */ - extern int __gconv_close (__gconv_t cd) - attribute_hidden; -diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c -index dd54bc12e0..5b30055c04 100644 ---- a/iconv/iconv_open.c -+++ b/iconv/iconv_open.c -@@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode) - - int res = __gconv_open (&conv_spec, &cd, 0); - -- gconv_destroy_spec (&conv_spec); -+ __gconv_destroy_spec (&conv_spec); - - if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) - { -diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c -index b4334faa57..d59979759c 100644 ---- a/iconv/iconv_prog.c -+++ b/iconv/iconv_prog.c -@@ -184,7 +184,7 @@ main (int argc, char *argv[]) - /* Let's see whether we have these coded character sets. */ - res = __gconv_open (&conv_spec, &cd, 0); - -- gconv_destroy_spec (&conv_spec); -+ __gconv_destroy_spec (&conv_spec); - - if (res != __GCONV_OK) - { -diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh -index 8298136b7f..d8db7b335c 100644 ---- a/iconv/tst-iconv_prog.sh -+++ b/iconv/tst-iconv_prog.sh -@@ -102,12 +102,16 @@ hangarray=( - "\x00\x80;-c;IBM1161;UTF-8//TRANSLIT//IGNORE" - "\x00\xdb;-c;IBM1162;UTF-8//TRANSLIT//IGNORE" - "\x00\x70;-c;IBM12712;UTF-8//TRANSLIT//IGNORE" --# These are known hangs that are yet to be fixed: --# "\x00\x0f;-c;IBM1364;UTF-8" --# "\x00\x0f;-c;IBM1371;UTF-8" --# "\x00\x0f;-c;IBM1388;UTF-8" --# "\x00\x0f;-c;IBM1390;UTF-8" --# "\x00\x0f;-c;IBM1399;UTF-8" -+"\x00\x0f;-c;IBM1364;UTF-8" -+"\x0e\x0e;-c;IBM1364;UTF-8" -+"\x00\x0f;-c;IBM1371;UTF-8" -+"\x0e\x0e;-c;IBM1371;UTF-8" -+"\x00\x0f;-c;IBM1388;UTF-8" -+"\x0e\x0e;-c;IBM1388;UTF-8" -+"\x00\x0f;-c;IBM1390;UTF-8" -+"\x0e\x0e;-c;IBM1390;UTF-8" -+"\x00\x0f;-c;IBM1399;UTF-8" -+"\x0e\x0e;-c;IBM1399;UTF-8" - "\x00\x53;-c;IBM16804;UTF-8//TRANSLIT//IGNORE" - "\x00\x41;-c;IBM274;UTF-8//TRANSLIT//IGNORE" - "\x00\x41;-c;IBM275;UTF-8//TRANSLIT//IGNORE" -diff --git a/iconvdata/Makefile b/iconvdata/Makefile -index 4ec2741cdc..b67b4feeb4 100644 ---- a/iconvdata/Makefile -+++ b/iconvdata/Makefile -@@ -1,4 +1,5 @@ - # Copyright (C) 1997-2020 Free Software Foundation, Inc. -+# Copyright (C) The GNU Toolchain Authors. - # This file is part of the GNU C Library. - - # The GNU C Library is free software; you can redistribute it and/or -@@ -73,7 +74,8 @@ modules.so := $(addsuffix .so, $(modules)) - ifeq (yes,$(build-shared)) - tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ - tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ -- bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 -+ bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \ -+ bug-iconv13 bug-iconv14 bug-iconv15 - ifeq ($(have-thread-library),yes) - tests += bug-iconv3 - endif -@@ -321,6 +323,10 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \ - $(addprefix $(objpfx),$(modules.so)) - $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \ - $(addprefix $(objpfx),$(modules.so)) -+$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \ -+ $(addprefix $(objpfx),$(modules.so)) -+$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \ -+ $(addprefix $(objpfx),$(modules.so)) - - $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \ - $(addprefix $(objpfx),$(modules.so)) \ -diff --git a/iconvdata/bug-iconv13.c b/iconvdata/bug-iconv13.c -new file mode 100644 -index 0000000000..87aaff398e ---- /dev/null -+++ b/iconvdata/bug-iconv13.c -@@ -0,0 +1,53 @@ -+/* bug 24973: Test EUC-KR module -+ Copyright (C) 2020 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <errno.h> -+#include <iconv.h> -+#include <stdio.h> -+#include <support/check.h> -+ -+static int -+do_test (void) -+{ -+ iconv_t cd = iconv_open ("UTF-8//IGNORE", "EUC-KR"); -+ TEST_VERIFY_EXIT (cd != (iconv_t) -1); -+ -+ /* 0xfe (->0x7e : row 94) and 0xc9 (->0x49 : row 41) are user-defined -+ areas, which are not allowed and should be skipped over due to -+ //IGNORE. The trailing 0xfe also is an incomplete sequence, which -+ should be checked first. */ -+ char input[4] = { '\xc9', '\xa1', '\0', '\xfe' }; -+ char *inptr = input; -+ size_t insize = sizeof (input); -+ char output[4]; -+ char *outptr = output; -+ size_t outsize = sizeof (output); -+ -+ /* This used to crash due to buffer overrun. */ -+ TEST_VERIFY (iconv (cd, &inptr, &insize, &outptr, &outsize) == (size_t) -1); -+ TEST_VERIFY (errno == EINVAL); -+ /* The conversion should produce one character, the converted null -+ character. */ -+ TEST_VERIFY (sizeof (output) - outsize == 1); -+ -+ TEST_VERIFY_EXIT (iconv_close (cd) != -1); -+ -+ return 0; -+} -+ -+#include <support/test-driver.c> -diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c -new file mode 100644 -index 0000000000..902f140fa9 ---- /dev/null -+++ b/iconvdata/bug-iconv14.c -@@ -0,0 +1,127 @@ -+/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256). -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <iconv.h> -+#include <string.h> -+#include <errno.h> -+#include <support/check.h> -+ -+/* Use an escape sequence to return to the initial state. */ -+static void -+with_escape_sequence (void) -+{ -+ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3"); -+ TEST_VERIFY_EXIT (c != (iconv_t) -1); -+ -+ char in[] = "\e$(O+D\e(B"; -+ char *inbuf = in; -+ size_t inleft = strlen (in); -+ char out[3]; /* Space for one output character. */ -+ char *outbuf; -+ size_t outleft; -+ -+ outbuf = out; -+ outleft = sizeof (out); -+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1); -+ TEST_COMPARE (errno, E2BIG); -+ TEST_COMPARE (inleft, 3); -+ TEST_COMPARE (inbuf - in, strlen (in) - 3); -+ TEST_COMPARE (outleft, sizeof (out) - 2); -+ TEST_COMPARE (outbuf - out, 2); -+ TEST_COMPARE (out[0] & 0xff, 0xc3); -+ TEST_COMPARE (out[1] & 0xff, 0xa6); -+ -+ /* Return to the initial shift state, producing the pending -+ character. */ -+ outbuf = out; -+ outleft = sizeof (out); -+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0); -+ TEST_COMPARE (inleft, 0); -+ TEST_COMPARE (inbuf - in, strlen (in)); -+ TEST_COMPARE (outleft, sizeof (out) - 2); -+ TEST_COMPARE (outbuf - out, 2); -+ TEST_COMPARE (out[0] & 0xff, 0xcc); -+ TEST_COMPARE (out[1] & 0xff, 0x80); -+ -+ /* Nothing should be flushed the second time. */ -+ outbuf = out; -+ outleft = sizeof (out); -+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); -+ TEST_COMPARE (outleft, sizeof (out)); -+ TEST_COMPARE (outbuf - out, 0); -+ TEST_COMPARE (out[0] & 0xff, 0xcc); -+ TEST_COMPARE (out[1] & 0xff, 0x80); -+ -+ TEST_COMPARE (iconv_close (c), 0); -+} -+ -+/* Use an explicit flush to return to the initial state. */ -+static void -+with_flush (void) -+{ -+ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3"); -+ TEST_VERIFY_EXIT (c != (iconv_t) -1); -+ -+ char in[] = "\e$(O+D"; -+ char *inbuf = in; -+ size_t inleft = strlen (in); -+ char out[3]; /* Space for one output character. */ -+ char *outbuf; -+ size_t outleft; -+ -+ outbuf = out; -+ outleft = sizeof (out); -+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1); -+ TEST_COMPARE (errno, E2BIG); -+ TEST_COMPARE (inleft, 0); -+ TEST_COMPARE (inbuf - in, strlen (in)); -+ TEST_COMPARE (outleft, sizeof (out) - 2); -+ TEST_COMPARE (outbuf - out, 2); -+ TEST_COMPARE (out[0] & 0xff, 0xc3); -+ TEST_COMPARE (out[1] & 0xff, 0xa6); -+ -+ /* Flush the pending character. */ -+ outbuf = out; -+ outleft = sizeof (out); -+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); -+ TEST_COMPARE (outleft, sizeof (out) - 2); -+ TEST_COMPARE (outbuf - out, 2); -+ TEST_COMPARE (out[0] & 0xff, 0xcc); -+ TEST_COMPARE (out[1] & 0xff, 0x80); -+ -+ /* Nothing should be flushed the second time. */ -+ outbuf = out; -+ outleft = sizeof (out); -+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); -+ TEST_COMPARE (outleft, sizeof (out)); -+ TEST_COMPARE (outbuf - out, 0); -+ TEST_COMPARE (out[0] & 0xff, 0xcc); -+ TEST_COMPARE (out[1] & 0xff, 0x80); -+ -+ TEST_COMPARE (iconv_close (c), 0); -+} -+ -+static int -+do_test (void) -+{ -+ with_escape_sequence (); -+ with_flush (); -+ return 0; -+} -+ -+#include <support/test-driver.c> -diff --git a/iconvdata/bug-iconv15.c b/iconvdata/bug-iconv15.c -new file mode 100644 -index 0000000000..cc04bd0313 ---- /dev/null -+++ b/iconvdata/bug-iconv15.c -@@ -0,0 +1,60 @@ -+/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv -+ may emit spurious NUL character on state reset. -+ Copyright (C) The GNU Toolchain Authors. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <stddef.h> -+#include <iconv.h> -+#include <support/check.h> -+ -+static int -+do_test (void) -+{ -+ char in[] = "\x1b(I"; -+ char *inbuf = in; -+ size_t inleft = sizeof (in) - 1; -+ char out[1]; -+ char *outbuf = out; -+ size_t outleft = sizeof (out); -+ iconv_t cd; -+ -+ cd = iconv_open ("UTF8", "ISO-2022-JP-3"); -+ TEST_VERIFY_EXIT (cd != (iconv_t) -1); -+ -+ /* First call to iconv should alter internal state. -+ Now, JISX0201_Kana_set is selected and -+ state value != ASCII_set. */ -+ TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1); -+ -+ /* No bytes should have been added to -+ the output buffer at this point. */ -+ TEST_VERIFY (outbuf == out); -+ TEST_VERIFY (outleft == sizeof (out)); -+ -+ /* Second call shall emit spurious NUL character in unpatched glibc. */ -+ TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1); -+ -+ /* No characters are expected to be produced. */ -+ TEST_VERIFY (outbuf == out); -+ TEST_VERIFY (outleft == sizeof (out)); -+ -+ TEST_VERIFY_EXIT (iconv_close (cd) != -1); -+ -+ return 0; -+} -+ -+#include <support/test-driver.c> -diff --git a/iconvdata/euc-kr.c b/iconvdata/euc-kr.c -index b0d56cf3ee..1045bae926 100644 ---- a/iconvdata/euc-kr.c -+++ b/iconvdata/euc-kr.c -@@ -80,11 +80,7 @@ euckr_from_ucs4 (uint32_t ch, unsigned char *cp) - \ - if (ch <= 0x9f) \ - ++inptr; \ -- /* 0xfe(->0x7e : row 94) and 0xc9(->0x59 : row 41) are \ -- user-defined areas. */ \ -- else if (__builtin_expect (ch == 0xa0, 0) \ -- || __builtin_expect (ch > 0xfe, 0) \ -- || __builtin_expect (ch == 0xc9, 0)) \ -+ else if (__glibc_unlikely (ch == 0xa0)) \ - { \ - /* This is illegal. */ \ - STANDARD_FROM_LOOP_ERR_HANDLER (1); \ -diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c -index 49e7267ab4..521f0825b7 100644 ---- a/iconvdata/ibm1364.c -+++ b/iconvdata/ibm1364.c -@@ -158,24 +158,14 @@ enum - \ - if (__builtin_expect (ch, 0) == SO) \ - { \ -- /* Shift OUT, change to DBCS converter. */ \ -- if (curcs == db) \ -- { \ -- result = __GCONV_ILLEGAL_INPUT; \ -- break; \ -- } \ -+ /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ - curcs = db; \ - ++inptr; \ - continue; \ - } \ - if (__builtin_expect (ch, 0) == SI) \ - { \ -- /* Shift IN, change to SBCS converter. */ \ -- if (curcs == sb) \ -- { \ -- result = __GCONV_ILLEGAL_INPUT; \ -- break; \ -- } \ -+ /* Shift IN, change to SBCS converter (redundant escape okay). */ \ - curcs = sb; \ - ++inptr; \ - continue; \ -diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c -index 8c3b7e627e..c7b470db61 100644 ---- a/iconvdata/iso-2022-jp-3.c -+++ b/iconvdata/iso-2022-jp-3.c -@@ -1,5 +1,6 @@ - /* Conversion module for ISO-2022-JP-3. - Copyright (C) 1998-2020 Free Software Foundation, Inc. -+ Copyright (C) The GNU Toolchain Authors. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998, - and Bruno Haible <bruno@clisp.org>, 2002. -@@ -67,10 +68,15 @@ enum - CURRENT_SEL_MASK = 7 << 3 - }; - --/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state -- also contains the last two bytes to be output, shifted by 6 bits, and a -- one-bit indicator whether they must be preceded by the shift sequence, -- in bit 22. */ -+/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the -+ state also contains the last two bytes to be output, shifted by 6 -+ bits, and a one-bit indicator whether they must be preceded by the -+ shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4 -+ conversion, COUNT may also contain a non-zero pending wide -+ character, shifted by six bits. This happens for certain inputs in -+ JISX0213_1_2004_set and JISX0213_2_set if the second wide character -+ in a combining sequence cannot be written because the buffer is -+ full. */ - - /* Since this is a stateful encoding we have to provide code which resets - the output state to the initial state. This has to be done during the -@@ -80,10 +86,27 @@ enum - { \ - if (FROM_DIRECTION) \ - { \ -- /* It's easy, we don't have to emit anything, we just reset the \ -- state for the input. */ \ -- data->__statep->__count &= 7; \ -- data->__statep->__count |= ASCII_set; \ -+ uint32_t ch = data->__statep->__count >> 6; \ -+ \ -+ if (__glibc_unlikely (ch != 0)) \ -+ { \ -+ if (__glibc_likely (outbuf + 4 <= outend)) \ -+ { \ -+ /* Write out the last character. */ \ -+ put32u (outbuf, ch); \ -+ outbuf += 4; \ -+ data->__statep->__count &= 7; \ -+ data->__statep->__count |= ASCII_set; \ -+ } \ -+ else \ -+ /* We don't have enough room in the output buffer. */ \ -+ status = __GCONV_FULL_OUTPUT; \ -+ } \ -+ else \ -+ { \ -+ data->__statep->__count &= 7; \ -+ data->__statep->__count |= ASCII_set; \ -+ } \ - } \ - else \ - { \ -@@ -151,7 +174,21 @@ enum - #define LOOPFCT FROM_LOOP - #define BODY \ - { \ -- uint32_t ch = *inptr; \ -+ uint32_t ch; \ -+ \ -+ /* Output any pending character. */ \ -+ ch = set >> 6; \ -+ if (__glibc_unlikely (ch != 0)) \ -+ { \ -+ put32 (outptr, ch); \ -+ outptr += 4; \ -+ /* Remove the pending character, but preserve state bits. */ \ -+ set &= (1 << 6) - 1; \ -+ continue; \ -+ } \ -+ \ -+ /* Otherwise read the next input byte. */ \ -+ ch = *inptr; \ - \ - /* Recognize escape sequences. */ \ - if (__glibc_unlikely (ch == ESC)) \ -@@ -297,21 +334,25 @@ enum - uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ - uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ - \ -+ inptr += 2; \ -+ \ -+ put32 (outptr, u1); \ -+ outptr += 4; \ -+ \ - /* See whether we have room for two characters. */ \ -- if (outptr + 8 <= outend) \ -+ if (outptr + 4 <= outend) \ - { \ -- inptr += 2; \ -- put32 (outptr, u1); \ -- outptr += 4; \ - put32 (outptr, u2); \ - outptr += 4; \ - continue; \ - } \ -- else \ -- { \ -- result = __GCONV_FULL_OUTPUT; \ -- break; \ -- } \ -+ \ -+ /* Otherwise store only the first character now, and \ -+ put the second one into the queue. */ \ -+ set |= u2 << 6; \ -+ /* Tell the caller why we terminate the loop. */ \ -+ result = __GCONV_FULL_OUTPUT; \ -+ break; \ - } \ - \ - inptr += 2; \ -diff --git a/iconvdata/ksc5601.h b/iconvdata/ksc5601.h -index d3eb3a4ff8..f5cdc72797 100644 ---- a/iconvdata/ksc5601.h -+++ b/iconvdata/ksc5601.h -@@ -50,15 +50,15 @@ ksc5601_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset) - unsigned char ch2; - int idx; - -+ if (avail < 2) -+ return 0; -+ - /* row 94(0x7e) and row 41(0x49) are user-defined area in KS C 5601 */ - - if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) >= 0x7e - || (ch - offset) == 0x49) - return __UNKNOWN_10646_CHAR; - -- if (avail < 2) -- return 0; -- - ch2 = (*s)[1]; - if (ch2 < offset || (ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f) - return __UNKNOWN_10646_CHAR; -diff --git a/intl/dcigettext.c b/intl/dcigettext.c -index 2e7c662bc7..bd332e71da 100644 ---- a/intl/dcigettext.c -+++ b/intl/dcigettext.c -@@ -1120,15 +1120,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file, - - # ifdef _LIBC - -- struct gconv_spec conv_spec -- = { .fromcode = norm_add_slashes (charset, ""), -- .tocode = norm_add_slashes (outcharset, ""), -- /* We always want to use transliteration. */ -- .translit = true, -- .ignore = false -- }; -+ struct gconv_spec conv_spec; -+ -+ __gconv_create_spec (&conv_spec, charset, outcharset); -+ -+ /* We always want to use transliteration. */ -+ conv_spec.translit = true; -+ - int r = __gconv_open (&conv_spec, &convd->conv, - GCONV_AVOID_NOCONV); -+ -+ __gconv_destroy_spec (&conv_spec); -+ - if (__builtin_expect (r != __GCONV_OK, 0)) - { - /* If the output encoding is the same there is -diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c -index fd70432eca..e9f6e5e09f 100644 ---- a/intl/tst-codeset.c -+++ b/intl/tst-codeset.c -@@ -22,13 +22,11 @@ - #include <stdio.h> - #include <stdlib.h> - #include <string.h> -+#include <support/check.h> - - static int - do_test (void) - { -- char *s; -- int result = 0; -- - unsetenv ("LANGUAGE"); - unsetenv ("OUTPUT_CHARSET"); - setlocale (LC_ALL, "de_DE.ISO-8859-1"); -@@ -36,25 +34,21 @@ do_test (void) - bindtextdomain ("codeset", OBJPFX "domaindir"); - - /* Here we expect output in ISO-8859-1. */ -- s = gettext ("cheese"); -- if (strcmp (s, "K\344se")) -- { -- printf ("call 1 returned: %s\n", s); -- result = 1; -- } -+ TEST_COMPARE_STRING (gettext ("cheese"), "K\344se"); - -+ /* Here we expect output in UTF-8. */ - bind_textdomain_codeset ("codeset", "UTF-8"); -+ TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se"); - -- /* Here we expect output in UTF-8. */ -- s = gettext ("cheese"); -- if (strcmp (s, "K\303\244se")) -- { -- printf ("call 2 returned: %s\n", s); -- result = 1; -- } -- -- return result; -+ /* `a with umlaut' is transliterated to `ae'. */ -+ bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT"); -+ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); -+ -+ /* Transliteration also works by default even if not set. */ -+ bind_textdomain_codeset ("codeset", "ASCII"); -+ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); -+ -+ return 0; - } - --#define TEST_FUNCTION do_test () --#include "../test-skeleton.c" -+#include <support/test-driver.c> -diff --git a/malloc/Makefile b/malloc/Makefile -index e22cbde22d..5093e8730e 100644 ---- a/malloc/Makefile -+++ b/malloc/Makefile -@@ -62,6 +62,16 @@ endif - tests += $(tests-static) - test-srcs = tst-mtrace - -+# These tests either are run with MALLOC_CHECK_=3 by default or do not work -+# with MALLOC_CHECK_=3 because they expect a specific failure. -+tests-exclude-mcheck = tst-mcheck tst-malloc-usable \ -+ tst-interpose-nothread tst-interpose-static-nothread \ -+ tst-interpose-static-thread tst-malloc-too-large \ -+ tst-mxfast tst-safe-linking -+ -+# Run all tests with MALLOC_CHECK_=3 -+tests-mcheck = $(filter-out $(tests-exclude-mcheck),$(tests)) -+ - routines = malloc morecore mcheck mtrace obstack reallocarray \ - scratch_buffer_grow scratch_buffer_grow_preserve \ - scratch_buffer_set_array_size \ -@@ -100,6 +110,11 @@ $(objpfx)tst-malloc-thread-exit: $(shared-thread-library) - $(objpfx)tst-malloc-thread-fail: $(shared-thread-library) - $(objpfx)tst-malloc-fork-deadlock: $(shared-thread-library) - $(objpfx)tst-malloc-stats-cancellation: $(shared-thread-library) -+$(objpfx)tst-malloc-backtrace-mcheck: $(shared-thread-library) -+$(objpfx)tst-malloc-thread-exit-mcheck: $(shared-thread-library) -+$(objpfx)tst-malloc-thread-fail-mcheck: $(shared-thread-library) -+$(objpfx)tst-malloc-fork-deadlock-mcheck: $(shared-thread-library) -+$(objpfx)tst-malloc-stats-cancellation-mcheck: $(shared-thread-library) - - # Export the __malloc_initialize_hook variable to libc.so. - LDFLAGS-tst-mallocstate = -rdynamic -@@ -239,6 +254,8 @@ $(tests:%=$(objpfx)%.o): CPPFLAGS += -DTEST_NO_MALLOPT - $(objpfx)tst-interpose-nothread: $(objpfx)tst-interpose-aux-nothread.o - $(objpfx)tst-interpose-thread: \ - $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library) -+$(objpfx)tst-interpose-thread-mcheck: \ -+ $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library) - $(objpfx)tst-interpose-static-nothread: $(objpfx)tst-interpose-aux-nothread.o - $(objpfx)tst-interpose-static-thread: \ - $(objpfx)tst-interpose-aux-thread.o $(static-thread-library) -@@ -256,3 +273,6 @@ $(objpfx)tst-dynarray-fail-mem.out: $(objpfx)tst-dynarray-fail.out - $(objpfx)tst-malloc-tcache-leak: $(shared-thread-library) - $(objpfx)tst-malloc_info: $(shared-thread-library) - $(objpfx)tst-mallocfork2: $(shared-thread-library) -+$(objpfx)tst-malloc-tcache-leak-mcheck: $(shared-thread-library) -+$(objpfx)tst-malloc_info-mcheck: $(shared-thread-library) -+$(objpfx)tst-mallocfork2-mcheck: $(shared-thread-library) -diff --git a/manual/tunables.texi b/manual/tunables.texi -index 23ef0d40e7..d72d7a5ec0 100644 ---- a/manual/tunables.texi -+++ b/manual/tunables.texi -@@ -432,7 +432,11 @@ set shared cache size in bytes for use in memory and string routines. - - @deftp Tunable glibc.cpu.x86_non_temporal_threshold - The @code{glibc.cpu.x86_non_temporal_threshold} tunable allows the user --to set threshold in bytes for non temporal store. -+to set threshold in bytes for non temporal store. Non temporal stores -+give a hint to the hardware to move data directly to memory without -+displacing other data from the cache. This tunable is used by some -+platforms to determine when to use non temporal stores in operations -+like memmove and memcpy. - - This tunable is specific to i386 and x86-64. - @end deftp -diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h -index 19d9cc5cfe..38221d0b2a 100644 ---- a/misc/sys/cdefs.h -+++ b/misc/sys/cdefs.h -@@ -124,13 +124,10 @@ - #define __bos0(ptr) __builtin_object_size (ptr, 0) - - #if __GNUC_PREREQ (4,3) --# define __warndecl(name, msg) \ -- extern void name (void) __attribute__((__warning__ (msg))) - # define __warnattr(msg) __attribute__((__warning__ (msg))) - # define __errordecl(name, msg) \ - extern void name (void) __attribute__((__error__ (msg))) - #else --# define __warndecl(name, msg) extern void name (void) - # define __warnattr(msg) - # define __errordecl(name, msg) extern void name (void) - #endif -diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c -index 2cba3da38c..c217cda608 100644 ---- a/nptl/pthread_create.c -+++ b/nptl/pthread_create.c -@@ -416,8 +416,6 @@ START_THREAD_DEFN - unwind_buf.priv.data.prev = NULL; - unwind_buf.priv.data.cleanup = NULL; - -- __libc_signal_restore_set (&pd->sigmask); -- - /* Allow setxid from now onwards. */ - if (__glibc_unlikely (atomic_exchange_acq (&pd->setxid_futex, 0) == -2)) - futex_wake (&pd->setxid_futex, 1, FUTEX_PRIVATE); -@@ -427,6 +425,8 @@ START_THREAD_DEFN - /* Store the new cleanup handler info. */ - THREAD_SETMEM (pd, cleanup_jmp_buf, &unwind_buf); - -+ __libc_signal_restore_set (&pd->sigmask); -+ - /* We are either in (a) or (b), and in either case we either own - PD already (2) or are about to own PD (1), and so our only - restriction would be that we can't free PD until we know we -diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c -index 88c69d1e9c..381aa721ef 100644 ---- a/nscd/netgroupcache.c -+++ b/nscd/netgroupcache.c -@@ -248,7 +248,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, - : NULL); - ndomain = (ndomain ? newbuf + ndomaindiff - : NULL); -- buffer = newbuf; -+ *tofreep = buffer = newbuf; - } - - nhost = memcpy (buffer + bufused, -@@ -319,7 +319,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, - else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE) - { - buflen *= 2; -- buffer = xrealloc (buffer, buflen); -+ *tofreep = buffer = xrealloc (buffer, buflen); - } - else if (status == NSS_STATUS_RETURN - || status == NSS_STATUS_NOTFOUND -diff --git a/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf b/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf -new file mode 100644 -index 0000000000..5b0c6a4199 ---- /dev/null -+++ b/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf -@@ -0,0 +1 @@ -+hosts: files -diff --git a/posix/bits/unistd.h b/posix/bits/unistd.h -index 725a83eb0d..7e5bb6fb1e 100644 ---- a/posix/bits/unistd.h -+++ b/posix/bits/unistd.h -@@ -193,10 +193,9 @@ __NTH (readlinkat (int __fd, const char *__restrict __path, - #endif - - extern char *__getcwd_chk (char *__buf, size_t __size, size_t __buflen) -- __THROW __wur __attr_access ((__write_only__, 1, 2)); -+ __THROW __wur; - extern char *__REDIRECT_NTH (__getcwd_alias, -- (char *__buf, size_t __size), getcwd) -- __wur __attr_access ((__write_only__, 1, 2)); -+ (char *__buf, size_t __size), getcwd) __wur; - extern char *__REDIRECT_NTH (__getcwd_chk_warn, - (char *__buf, size_t __size, size_t __buflen), - __getcwd_chk) -diff --git a/posix/unistd.h b/posix/unistd.h -index 32b8161619..acf9ee7e79 100644 ---- a/posix/unistd.h -+++ b/posix/unistd.h -@@ -517,8 +517,7 @@ extern int fchdir (int __fd) __THROW __wur; - an array is allocated with `malloc'; the array is SIZE - bytes long, unless SIZE == 0, in which case it is as - big as necessary. */ --extern char *getcwd (char *__buf, size_t __size) __THROW __wur -- __attr_access ((__write_only__, 1, 2)); -+extern char *getcwd (char *__buf, size_t __size) __THROW __wur; - - #ifdef __USE_GNU - /* Return a malloc'd string containing the current directory name. -@@ -831,7 +830,7 @@ extern int symlinkat (const char *__from, int __tofd, - /* Like readlink but a relative PATH is interpreted relative to FD. */ - extern ssize_t readlinkat (int __fd, const char *__restrict __path, - char *__restrict __buf, size_t __len) -- __THROW __nonnull ((2, 3)) __wur __attr_access ((__read_only__, 3, 4)); -+ __THROW __nonnull ((2, 3)) __wur __attr_access ((__write_only__, 3, 4)); - #endif - - /* Remove the link NAME. */ -diff --git a/posix/wordexp-test.c b/posix/wordexp-test.c -index ed1b22308e..cb3f989cba 100644 ---- a/posix/wordexp-test.c -+++ b/posix/wordexp-test.c -@@ -183,6 +183,7 @@ struct test_case_struct - { 0, NULL, "$var", 0, 0, { NULL, }, IFS }, - { 0, NULL, "\"\\n\"", 0, 1, { "\\n", }, IFS }, - { 0, NULL, "", 0, 0, { NULL, }, IFS }, -+ { 0, NULL, "${1234567890123456789012}", 0, 0, { NULL, }, IFS }, - - /* Flags not already covered (testit() has special handling for these) */ - { 0, NULL, "one two", WRDE_DOOFFS, 2, { "one", "two", }, IFS }, -diff --git a/posix/wordexp.c b/posix/wordexp.c -index e082d94895..56289503a1 100644 ---- a/posix/wordexp.c -+++ b/posix/wordexp.c -@@ -1399,7 +1399,7 @@ envsubst: - /* Is it a numeric parameter? */ - else if (isdigit (env[0])) - { -- int n = atoi (env); -+ unsigned long n = strtoul (env, NULL, 10); - - if (n >= __libc_argc) - /* Substitute NULL. */ -diff --git a/resolv/Makefile b/resolv/Makefile -index b61c0c3e0c..dbd8f8bf4f 100644 ---- a/resolv/Makefile -+++ b/resolv/Makefile -@@ -61,6 +61,11 @@ tests += \ - tst-resolv-search \ - tst-resolv-trailing \ - -+# This test calls __res_context_send directly, which is not exported -+# from libresolv. -+tests-internal += tst-resolv-txnid-collision -+tests-static += tst-resolv-txnid-collision -+ - # These tests need libdl. - ifeq (yes,$(build-shared)) - tests += \ -@@ -191,6 +196,8 @@ $(objpfx)tst-resolv-search: $(objpfx)libresolv.so $(shared-thread-library) - $(objpfx)tst-resolv-trailing: $(objpfx)libresolv.so $(shared-thread-library) - $(objpfx)tst-resolv-threads: \ - $(libdl) $(objpfx)libresolv.so $(shared-thread-library) -+$(objpfx)tst-resolv-txnid-collision: $(objpfx)libresolv.a \ -+ $(static-thread-library) - $(objpfx)tst-resolv-canonname: \ - $(libdl) $(objpfx)libresolv.so $(shared-thread-library) - $(objpfx)tst-resolv-trustad: $(objpfx)libresolv.so $(shared-thread-library) -diff --git a/resolv/res_send.c b/resolv/res_send.c -index 7e5fec6646..70e5066031 100644 ---- a/resolv/res_send.c -+++ b/resolv/res_send.c -@@ -1342,15 +1342,6 @@ send_dg(res_state statp, - *terrno = EMSGSIZE; - return close_and_return_error (statp, resplen2); - } -- if ((recvresp1 || hp->id != anhp->id) -- && (recvresp2 || hp2->id != anhp->id)) { -- /* -- * response from old query, ignore it. -- * XXX - potential security hazard could -- * be detected here. -- */ -- goto wait; -- } - - /* Paranoia check. Due to the connected UDP socket, - the kernel has already filtered invalid addresses -@@ -1360,15 +1351,24 @@ send_dg(res_state statp, - - /* Check for the correct header layout and a matching - question. */ -- if ((recvresp1 || !res_queriesmatch(buf, buf + buflen, -- *thisansp, -- *thisansp -- + *thisanssizp)) -- && (recvresp2 || !res_queriesmatch(buf2, buf2 + buflen2, -- *thisansp, -- *thisansp -- + *thisanssizp))) -- goto wait; -+ int matching_query = 0; /* Default to no matching query. */ -+ if (!recvresp1 -+ && anhp->id == hp->id -+ && res_queriesmatch (buf, buf + buflen, -+ *thisansp, *thisansp + *thisanssizp)) -+ matching_query = 1; -+ if (!recvresp2 -+ && anhp->id == hp2->id -+ && res_queriesmatch (buf2, buf2 + buflen2, -+ *thisansp, *thisansp + *thisanssizp)) -+ matching_query = 2; -+ if (matching_query == 0) -+ /* Spurious UDP packet. Drop it and continue -+ waiting. */ -+ { -+ need_recompute = 1; -+ goto wait; -+ } - - if (anhp->rcode == SERVFAIL || - anhp->rcode == NOTIMP || -@@ -1383,7 +1383,7 @@ send_dg(res_state statp, - /* No data from the first reply. */ - resplen = 0; - /* We are waiting for a possible second reply. */ -- if (hp->id == anhp->id) -+ if (matching_query == 1) - recvresp1 = 1; - else - recvresp2 = 1; -@@ -1414,7 +1414,7 @@ send_dg(res_state statp, - return (1); - } - /* Mark which reply we received. */ -- if (recvresp1 == 0 && hp->id == anhp->id) -+ if (matching_query == 1) - recvresp1 = 1; - else - recvresp2 = 1; -diff --git a/resolv/tst-resolv-txnid-collision.c b/resolv/tst-resolv-txnid-collision.c -new file mode 100644 -index 0000000000..189b76f126 ---- /dev/null -+++ b/resolv/tst-resolv-txnid-collision.c -@@ -0,0 +1,334 @@ -+/* Test parallel queries with transaction ID collisions. -+ Copyright (C) 2020 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <arpa/nameser.h> -+#include <array_length.h> -+#include <resolv-internal.h> -+#include <resolv_context.h> -+#include <stdbool.h> -+#include <stdio.h> -+#include <string.h> -+#include <support/check.h> -+#include <support/check_nss.h> -+#include <support/resolv_test.h> -+#include <support/support.h> -+#include <support/test-driver.h> -+ -+/* Result of parsing a DNS question name. -+ -+ A question name has the form reorder-N-M-rcode-C.example.net, where -+ N and M are either 0 and 1, corresponding to the reorder member, -+ and C is a number that will be stored in the rcode field. -+ -+ Also see parse_qname below. */ -+struct parsed_qname -+{ -+ /* The DNS response code requested from the first server. The -+ second server always responds with RCODE zero. */ -+ int rcode; -+ -+ /* Indicates whether to perform reordering in the responses from the -+ respective server. */ -+ bool reorder[2]; -+}; -+ -+/* Fills *PARSED based on QNAME. */ -+static void -+parse_qname (struct parsed_qname *parsed, const char *qname) -+{ -+ int reorder0; -+ int reorder1; -+ int rcode; -+ char *suffix; -+ if (sscanf (qname, "reorder-%d-%d.rcode-%d.%ms", -+ &reorder0, &reorder1, &rcode, &suffix) == 4) -+ { -+ if (reorder0 != 0) -+ TEST_COMPARE (reorder0, 1); -+ if (reorder1 != 0) -+ TEST_COMPARE (reorder1, 1); -+ TEST_VERIFY (rcode >= 0 && rcode <= 15); -+ TEST_COMPARE_STRING (suffix, "example.net"); -+ free (suffix); -+ -+ parsed->rcode = rcode; -+ parsed->reorder[0] = reorder0; -+ parsed->reorder[1] = reorder1; -+ } -+ else -+ FAIL_EXIT1 ("unexpected query: %s", qname); -+} -+ -+/* Used to construct a response. The first server responds with an -+ error, the second server succeeds. */ -+static void -+build_response (const struct resolv_response_context *ctx, -+ struct resolv_response_builder *b, -+ const char *qname, uint16_t qclass, uint16_t qtype) -+{ -+ struct parsed_qname parsed; -+ parse_qname (&parsed, qname); -+ -+ switch (ctx->server_index) -+ { -+ case 0: -+ { -+ struct resolv_response_flags flags = { 0 }; -+ if (parsed.rcode == 0) -+ /* Simulate a delegation in case a NODATA (RCODE zero) -+ response is requested. */ -+ flags.clear_ra = true; -+ else -+ flags.rcode = parsed.rcode; -+ -+ resolv_response_init (b, flags); -+ resolv_response_add_question (b, qname, qclass, qtype); -+ } -+ break; -+ -+ case 1: -+ { -+ struct resolv_response_flags flags = { 0, }; -+ resolv_response_init (b, flags); -+ resolv_response_add_question (b, qname, qclass, qtype); -+ -+ resolv_response_section (b, ns_s_an); -+ resolv_response_open_record (b, qname, qclass, qtype, 0); -+ if (qtype == T_A) -+ { -+ char ipv4[4] = { 192, 0, 2, 1 }; -+ resolv_response_add_data (b, &ipv4, sizeof (ipv4)); -+ } -+ else -+ { -+ char ipv6[16] -+ = { 0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; -+ resolv_response_add_data (b, &ipv6, sizeof (ipv6)); -+ } -+ resolv_response_close_record (b); -+ } -+ break; -+ } -+} -+ -+/* Used to reorder responses. */ -+struct resolv_response_context *previous_query; -+ -+/* Used to keep track of the queries received. */ -+static int previous_server_index = -1; -+static uint16_t previous_qtype; -+ -+/* For each server, buffer the first query and then send both answers -+ to the second query, reordered if requested. */ -+static void -+response (const struct resolv_response_context *ctx, -+ struct resolv_response_builder *b, -+ const char *qname, uint16_t qclass, uint16_t qtype) -+{ -+ TEST_VERIFY (qtype == T_A || qtype == T_AAAA); -+ if (ctx->server_index != 0) -+ TEST_COMPARE (ctx->server_index, 1); -+ -+ struct parsed_qname parsed; -+ parse_qname (&parsed, qname); -+ -+ if (previous_query == NULL) -+ { -+ /* No buffered query. Record this query and do not send a -+ response. */ -+ TEST_COMPARE (previous_qtype, 0); -+ previous_query = resolv_response_context_duplicate (ctx); -+ previous_qtype = qtype; -+ resolv_response_drop (b); -+ previous_server_index = ctx->server_index; -+ -+ if (test_verbose) -+ printf ("info: buffering first query for: %s\n", qname); -+ } -+ else -+ { -+ TEST_VERIFY (previous_query != 0); -+ TEST_COMPARE (ctx->server_index, previous_server_index); -+ TEST_VERIFY (previous_qtype != qtype); /* Not a duplicate. */ -+ -+ /* If reordering, send a response for this query explicitly, and -+ then skip the implicit send. */ -+ if (parsed.reorder[ctx->server_index]) -+ { -+ if (test_verbose) -+ printf ("info: sending reordered second response for: %s\n", -+ qname); -+ build_response (ctx, b, qname, qclass, qtype); -+ resolv_response_send_udp (ctx, b); -+ resolv_response_drop (b); -+ } -+ -+ /* Build a response for the previous query and send it, thus -+ reordering the two responses. */ -+ { -+ if (test_verbose) -+ printf ("info: sending first response for: %s\n", qname); -+ struct resolv_response_builder *btmp -+ = resolv_response_builder_allocate (previous_query->query_buffer, -+ previous_query->query_length); -+ build_response (ctx, btmp, qname, qclass, previous_qtype); -+ resolv_response_send_udp (ctx, btmp); -+ resolv_response_builder_free (btmp); -+ } -+ -+ /* If not reordering, send the reply as usual. */ -+ if (!parsed.reorder[ctx->server_index]) -+ { -+ if (test_verbose) -+ printf ("info: sending non-reordered second response for: %s\n", -+ qname); -+ build_response (ctx, b, qname, qclass, qtype); -+ } -+ -+ /* Unbuffer the response and prepare for the next query. */ -+ resolv_response_context_free (previous_query); -+ previous_query = NULL; -+ previous_qtype = 0; -+ previous_server_index = -1; -+ } -+} -+ -+/* Runs a query for QNAME and checks for the expected reply. See -+ struct parsed_qname for the expected format for QNAME. */ -+static void -+test_qname (const char *qname, int rcode) -+{ -+ struct resolv_context *ctx = __resolv_context_get (); -+ TEST_VERIFY_EXIT (ctx != NULL); -+ -+ unsigned char q1[512]; -+ int q1len = res_mkquery (QUERY, qname, C_IN, T_A, NULL, 0, NULL, -+ q1, sizeof (q1)); -+ TEST_VERIFY_EXIT (q1len > 12); -+ -+ unsigned char q2[512]; -+ int q2len = res_mkquery (QUERY, qname, C_IN, T_AAAA, NULL, 0, NULL, -+ q2, sizeof (q2)); -+ TEST_VERIFY_EXIT (q2len > 12); -+ -+ /* Produce a transaction ID collision. */ -+ memcpy (q2, q1, 2); -+ -+ unsigned char ans1[512]; -+ unsigned char *ans1p = ans1; -+ unsigned char *ans2p = NULL; -+ int nans2p = 0; -+ int resplen2 = 0; -+ int ans2p_malloced = 0; -+ -+ /* Perform a parallel A/AAAA query. */ -+ int resplen1 = __res_context_send (ctx, q1, q1len, q2, q2len, -+ ans1, sizeof (ans1), &ans1p, -+ &ans2p, &nans2p, -+ &resplen2, &ans2p_malloced); -+ -+ TEST_VERIFY (resplen1 > 12); -+ TEST_VERIFY (resplen2 > 12); -+ if (resplen1 <= 12 || resplen2 <= 12) -+ return; -+ -+ if (rcode == 1 || rcode == 3) -+ { -+ /* Format Error and Name Error responses does not trigger -+ switching to the next server. */ -+ TEST_COMPARE (ans1p[3] & 0x0f, rcode); -+ TEST_COMPARE (ans2p[3] & 0x0f, rcode); -+ return; -+ } -+ -+ /* The response should be successful. */ -+ TEST_COMPARE (ans1p[3] & 0x0f, 0); -+ TEST_COMPARE (ans2p[3] & 0x0f, 0); -+ -+ /* Due to bug 19691, the answer may not be in the slot matching the -+ query. Assume that the AAAA response is the longer one. */ -+ unsigned char *a_answer; -+ int a_answer_length; -+ unsigned char *aaaa_answer; -+ int aaaa_answer_length; -+ if (resplen2 > resplen1) -+ { -+ a_answer = ans1p; -+ a_answer_length = resplen1; -+ aaaa_answer = ans2p; -+ aaaa_answer_length = resplen2; -+ } -+ else -+ { -+ a_answer = ans2p; -+ a_answer_length = resplen2; -+ aaaa_answer = ans1p; -+ aaaa_answer_length = resplen1; -+ } -+ -+ { -+ char *expected = xasprintf ("name: %s\n" -+ "address: 192.0.2.1\n", -+ qname); -+ check_dns_packet (qname, a_answer, a_answer_length, expected); -+ free (expected); -+ } -+ { -+ char *expected = xasprintf ("name: %s\n" -+ "address: 2001:db8::1\n", -+ qname); -+ check_dns_packet (qname, aaaa_answer, aaaa_answer_length, expected); -+ free (expected); -+ } -+ -+ if (ans2p_malloced) -+ free (ans2p); -+ -+ __resolv_context_put (ctx); -+} -+ -+static int -+do_test (void) -+{ -+ struct resolv_test *aux = resolv_test_start -+ ((struct resolv_redirect_config) -+ { -+ .response_callback = response, -+ -+ /* The response callback use global state (the previous_* -+ variables), and query processing must therefore be -+ serialized. */ -+ .single_thread_udp = true, -+ }); -+ -+ for (int rcode = 0; rcode <= 5; ++rcode) -+ for (int do_reorder_0 = 0; do_reorder_0 < 2; ++do_reorder_0) -+ for (int do_reorder_1 = 0; do_reorder_1 < 2; ++do_reorder_1) -+ { -+ char *qname = xasprintf ("reorder-%d-%d.rcode-%d.example.net", -+ do_reorder_0, do_reorder_1, rcode); -+ test_qname (qname, rcode); -+ free (qname); -+ } -+ -+ resolv_test_end (aux); -+ -+ return 0; -+} -+ -+#include <support/test-driver.c> -diff --git a/rt/Makefile b/rt/Makefile -index dab5d62a57..93502cfaa7 100644 ---- a/rt/Makefile -+++ b/rt/Makefile -@@ -44,6 +44,7 @@ tests := tst-shm tst-timer tst-timer2 \ - tst-aio7 tst-aio8 tst-aio9 tst-aio10 \ - tst-mqueue1 tst-mqueue2 tst-mqueue3 tst-mqueue4 \ - tst-mqueue5 tst-mqueue6 tst-mqueue7 tst-mqueue8 tst-mqueue9 \ -+ tst-bz28213 \ - tst-timer3 tst-timer4 tst-timer5 \ - tst-cpuclock2 tst-cputimer1 tst-cputimer2 tst-cputimer3 \ - tst-shm-cancel -diff --git a/rt/tst-bz28213.c b/rt/tst-bz28213.c -new file mode 100644 -index 0000000000..0c096b5a0a ---- /dev/null -+++ b/rt/tst-bz28213.c -@@ -0,0 +1,101 @@ -+/* Bug 28213: test for NULL pointer dereference in mq_notify. -+ Copyright (C) The GNU Toolchain Authors. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <errno.h> -+#include <sys/types.h> -+#include <sys/stat.h> -+#include <fcntl.h> -+#include <unistd.h> -+#include <mqueue.h> -+#include <signal.h> -+#include <stdlib.h> -+#include <string.h> -+#include <support/check.h> -+ -+static mqd_t m = -1; -+static const char msg[] = "hello"; -+ -+static void -+check_bz28213_cb (union sigval sv) -+{ -+ char buf[sizeof (msg)]; -+ -+ (void) sv; -+ -+ TEST_VERIFY_EXIT ((size_t) mq_receive (m, buf, sizeof (buf), NULL) -+ == sizeof (buf)); -+ TEST_VERIFY_EXIT (memcmp (buf, msg, sizeof (buf)) == 0); -+ -+ exit (0); -+} -+ -+static void -+check_bz28213 (void) -+{ -+ struct sigevent sev; -+ -+ memset (&sev, '\0', sizeof (sev)); -+ sev.sigev_notify = SIGEV_THREAD; -+ sev.sigev_notify_function = check_bz28213_cb; -+ -+ /* Step 1: Register & unregister notifier. -+ Helper thread should receive NOTIFY_REMOVED notification. -+ In a vulnerable version of glibc, NULL pointer dereference follows. */ -+ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0); -+ TEST_VERIFY_EXIT (mq_notify (m, NULL) == 0); -+ -+ /* Step 2: Once again, register notification. -+ Try to send one message. -+ Test is considered successful, if the callback does exit (0). */ -+ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0); -+ TEST_VERIFY_EXIT (mq_send (m, msg, sizeof (msg), 1) == 0); -+ -+ /* Wait... */ -+ pause (); -+} -+ -+static int -+do_test (void) -+{ -+ static const char m_name[] = "/bz28213_queue"; -+ struct mq_attr m_attr; -+ -+ memset (&m_attr, '\0', sizeof (m_attr)); -+ m_attr.mq_maxmsg = 1; -+ m_attr.mq_msgsize = sizeof (msg); -+ -+ m = mq_open (m_name, -+ O_RDWR | O_CREAT | O_EXCL, -+ 0600, -+ &m_attr); -+ -+ if (m < 0) -+ { -+ if (errno == ENOSYS) -+ FAIL_UNSUPPORTED ("POSIX message queues are not implemented\n"); -+ FAIL_EXIT1 ("Failed to create POSIX message queue: %m\n"); -+ } -+ -+ TEST_VERIFY_EXIT (mq_unlink (m_name) == 0); -+ -+ check_bz28213 (); -+ -+ return 0; -+} -+ -+#include <support/test-driver.c> -diff --git a/stdio-common/Makefile b/stdio-common/Makefile -index 8475fd1f09..eff0c98d82 100644 ---- a/stdio-common/Makefile -+++ b/stdio-common/Makefile -@@ -69,7 +69,8 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \ - tst-printf-bz25691 \ - tst-vfprintf-width-prec-alloc \ - tst-printf-fp-free \ -- tst-printf-fp-leak -+ tst-printf-fp-leak \ -+ test-strerr - - - test-srcs = tst-unbputc tst-printf tst-printfsz-islongdouble -diff --git a/stdio-common/errlist.c b/stdio-common/errlist.c -index d15f13a22a..2ecf121674 100644 ---- a/stdio-common/errlist.c -+++ b/stdio-common/errlist.c -@@ -20,9 +20,13 @@ - #include <libintl.h> - #include <array_length.h> - -+#ifndef ERR_MAP -+# define ERR_MAP(n) n -+#endif -+ - const char *const _sys_errlist_internal[] = - { --#define _S(n, str) [n] = str, -+#define _S(n, str) [ERR_MAP(n)] = str, - #include <errlist.h> - #undef _S - }; -@@ -41,20 +45,21 @@ static const union sys_errname_t - { - #define MSGSTRFIELD1(line) str##line - #define MSGSTRFIELD(line) MSGSTRFIELD1(line) --#define _S(n, str) char MSGSTRFIELD(__LINE__)[sizeof(str)]; -+#define _S(n, str) char MSGSTRFIELD(__LINE__)[sizeof(#n)]; - #include <errlist.h> - #undef _S - }; - char str[0]; - } _sys_errname = { { --#define _S(n, s) s, -+#define _S(n, s) #n, - #include <errlist.h> - #undef _S - } }; - - static const unsigned short _sys_errnameidx[] = - { --#define _S(n, s) [n] = offsetof(union sys_errname_t, MSGSTRFIELD(__LINE__)), -+#define _S(n, s) \ -+ [ERR_MAP(n)] = offsetof(union sys_errname_t, MSGSTRFIELD(__LINE__)), - #include <errlist.h> - #undef _S - }; -diff --git a/stdio-common/test-strerr.c b/stdio-common/test-strerr.c -index fded208118..d77b81d507 100644 ---- a/stdio-common/test-strerr.c -+++ b/stdio-common/test-strerr.c -@@ -18,46 +18,672 @@ - - #include <string.h> - #include <errno.h> --#include <array_length.h> - - #include <support/support.h> - #include <support/check.h> - --#define N_(name) name -- --static const char *const errlist[] = -- { --/* This file is auto-generated from errlist.def. */ --#include <errlist.h> -- }; -- --#define MSGSTR_T errname_t --#define MSGSTR errname --#define MSGIDX errnameidx --#include <errlist-name.h> --#undef MSGSTR --#undef MSGIDX -- - static int - do_test (void) - { -- TEST_VERIFY (strerrordesc_np (-1) == NULL); -- TEST_VERIFY (strerrordesc_np (array_length (errlist)) == NULL); -- for (size_t i = 0; i < array_length (errlist); i++) -- { -- if (errlist[i] == NULL) -- continue; -- TEST_COMPARE_STRING (strerrordesc_np (i), errlist[i]); -- } -+ TEST_COMPARE_STRING (strerrordesc_np (0), "Success"); -+ TEST_COMPARE_STRING (strerrorname_np (0), "0"); - -- TEST_VERIFY (strerrorname_np (-1) == NULL); -- TEST_VERIFY (strerrorname_np (array_length (errlist)) == NULL); -- for (size_t i = 0; i < array_length (errlist); i++) -- { -- if (errlist[i] == NULL) -- continue; -- TEST_COMPARE_STRING (strerrorname_np (i), errname.str + errnameidx[i]); -- } -+#ifdef EPERM -+ TEST_COMPARE_STRING (strerrordesc_np (EPERM), "Operation not permitted"); -+ TEST_COMPARE_STRING (strerrorname_np (EPERM), "EPERM"); -+#endif -+#ifdef ENOENT -+ TEST_COMPARE_STRING (strerrordesc_np (ENOENT), -+ "No such file or directory"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOENT), "ENOENT"); -+#endif -+#ifdef ESRCH -+ TEST_COMPARE_STRING (strerrordesc_np (ESRCH), "No such process"); -+ TEST_COMPARE_STRING (strerrorname_np (ESRCH), "ESRCH"); -+#endif -+#ifdef EINTR -+ TEST_COMPARE_STRING (strerrordesc_np (EINTR), "Interrupted system call"); -+ TEST_COMPARE_STRING (strerrorname_np (EINTR), "EINTR"); -+#endif -+#ifdef EIO -+ TEST_COMPARE_STRING (strerrordesc_np (EIO), "Input/output error"); -+ TEST_COMPARE_STRING (strerrorname_np (EIO), "EIO"); -+#endif -+#ifdef ENXIO -+ TEST_COMPARE_STRING (strerrordesc_np (ENXIO), "No such device or address"); -+ TEST_COMPARE_STRING (strerrorname_np (ENXIO), "ENXIO"); -+#endif -+#ifdef E2BIG -+ TEST_COMPARE_STRING (strerrordesc_np (E2BIG), "Argument list too long"); -+ TEST_COMPARE_STRING (strerrorname_np (E2BIG), "E2BIG"); -+#endif -+#ifdef ENOEXEC -+ TEST_COMPARE_STRING (strerrordesc_np (ENOEXEC), "Exec format error"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOEXEC), "ENOEXEC"); -+#endif -+#ifdef EBADF -+ TEST_COMPARE_STRING (strerrordesc_np (EBADF), "Bad file descriptor"); -+ TEST_COMPARE_STRING (strerrorname_np (EBADF), "EBADF"); -+#endif -+#ifdef ECHILD -+ TEST_COMPARE_STRING (strerrordesc_np (ECHILD), "No child processes"); -+ TEST_COMPARE_STRING (strerrorname_np (ECHILD), "ECHILD"); -+#endif -+#ifdef EDEADLK -+ TEST_COMPARE_STRING (strerrordesc_np (EDEADLK), -+ "Resource deadlock avoided"); -+ TEST_COMPARE_STRING (strerrorname_np (EDEADLK), "EDEADLK"); -+#endif -+#ifdef ENOMEM -+ TEST_COMPARE_STRING (strerrordesc_np (ENOMEM), "Cannot allocate memory"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOMEM), "ENOMEM"); -+#endif -+#ifdef EACCES -+ TEST_COMPARE_STRING (strerrordesc_np (EACCES), "Permission denied"); -+ TEST_COMPARE_STRING (strerrorname_np (EACCES), "EACCES"); -+#endif -+#ifdef EFAULT -+ TEST_COMPARE_STRING (strerrordesc_np (EFAULT), "Bad address"); -+ TEST_COMPARE_STRING (strerrorname_np (EFAULT), "EFAULT"); -+#endif -+#ifdef ENOTBLK -+ TEST_COMPARE_STRING (strerrordesc_np (ENOTBLK), "Block device required"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOTBLK), "ENOTBLK"); -+#endif -+#ifdef EBUSY -+ TEST_COMPARE_STRING (strerrordesc_np (EBUSY), "Device or resource busy"); -+ TEST_COMPARE_STRING (strerrorname_np (EBUSY), "EBUSY"); -+#endif -+#ifdef EEXIST -+ TEST_COMPARE_STRING (strerrordesc_np (EEXIST), "File exists"); -+ TEST_COMPARE_STRING (strerrorname_np (EEXIST), "EEXIST"); -+#endif -+#ifdef EXDEV -+ TEST_COMPARE_STRING (strerrordesc_np (EXDEV), "Invalid cross-device link"); -+ TEST_COMPARE_STRING (strerrorname_np (EXDEV), "EXDEV"); -+#endif -+#ifdef ENODEV -+ TEST_COMPARE_STRING (strerrordesc_np (ENODEV), "No such device"); -+ TEST_COMPARE_STRING (strerrorname_np (ENODEV), "ENODEV"); -+#endif -+#ifdef ENOTDIR -+ TEST_COMPARE_STRING (strerrordesc_np (ENOTDIR), "Not a directory"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOTDIR), "ENOTDIR"); -+#endif -+#ifdef EISDIR -+ TEST_COMPARE_STRING (strerrordesc_np (EISDIR), "Is a directory"); -+ TEST_COMPARE_STRING (strerrorname_np (EISDIR), "EISDIR"); -+#endif -+#ifdef EINVAL -+ TEST_COMPARE_STRING (strerrordesc_np (EINVAL), "Invalid argument"); -+ TEST_COMPARE_STRING (strerrorname_np (EINVAL), "EINVAL"); -+#endif -+#ifdef EMFILE -+ TEST_COMPARE_STRING (strerrordesc_np (EMFILE), "Too many open files"); -+ TEST_COMPARE_STRING (strerrorname_np (EMFILE), "EMFILE"); -+#endif -+#ifdef ENFILE -+ TEST_COMPARE_STRING (strerrordesc_np (ENFILE), -+ "Too many open files in system"); -+ TEST_COMPARE_STRING (strerrorname_np (ENFILE), "ENFILE"); -+#endif -+#ifdef ENOTTY -+ TEST_COMPARE_STRING (strerrordesc_np (ENOTTY), -+ "Inappropriate ioctl for device"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOTTY), "ENOTTY"); -+#endif -+#ifdef ETXTBSY -+ TEST_COMPARE_STRING (strerrordesc_np (ETXTBSY), "Text file busy"); -+ TEST_COMPARE_STRING (strerrorname_np (ETXTBSY), "ETXTBSY"); -+#endif -+#ifdef EFBIG -+ TEST_COMPARE_STRING (strerrordesc_np (EFBIG), "File too large"); -+ TEST_COMPARE_STRING (strerrorname_np (EFBIG), "EFBIG"); -+#endif -+#ifdef ENOSPC -+ TEST_COMPARE_STRING (strerrordesc_np (ENOSPC), "No space left on device"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOSPC), "ENOSPC"); -+#endif -+#ifdef ESPIPE -+ TEST_COMPARE_STRING (strerrordesc_np (ESPIPE), "Illegal seek"); -+ TEST_COMPARE_STRING (strerrorname_np (ESPIPE), "ESPIPE"); -+#endif -+#ifdef EROFS -+ TEST_COMPARE_STRING (strerrordesc_np (EROFS), "Read-only file system"); -+ TEST_COMPARE_STRING (strerrorname_np (EROFS), "EROFS"); -+#endif -+#ifdef EMLINK -+ TEST_COMPARE_STRING (strerrordesc_np (EMLINK), "Too many links"); -+ TEST_COMPARE_STRING (strerrorname_np (EMLINK), "EMLINK"); -+#endif -+#ifdef EPIPE -+ TEST_COMPARE_STRING (strerrordesc_np (EPIPE), "Broken pipe"); -+ TEST_COMPARE_STRING (strerrorname_np (EPIPE), "EPIPE"); -+#endif -+#ifdef EDOM -+ TEST_COMPARE_STRING (strerrordesc_np (EDOM), -+ "Numerical argument out of domain"); -+ TEST_COMPARE_STRING (strerrorname_np (EDOM), "EDOM"); -+#endif -+#ifdef ERANGE -+ TEST_COMPARE_STRING (strerrordesc_np (ERANGE), -+ "Numerical result out of range"); -+ TEST_COMPARE_STRING (strerrorname_np (ERANGE), "ERANGE"); -+#endif -+#ifdef EAGAIN -+ TEST_COMPARE_STRING (strerrordesc_np (EAGAIN), -+ "Resource temporarily unavailable"); -+ TEST_COMPARE_STRING (strerrorname_np (EAGAIN), "EAGAIN"); -+#endif -+#ifdef EINPROGRESS -+ TEST_COMPARE_STRING (strerrordesc_np (EINPROGRESS), -+ "Operation now in progress"); -+ TEST_COMPARE_STRING (strerrorname_np (EINPROGRESS), "EINPROGRESS"); -+#endif -+#ifdef EALREADY -+ TEST_COMPARE_STRING (strerrordesc_np (EALREADY), -+ "Operation already in progress"); -+ TEST_COMPARE_STRING (strerrorname_np (EALREADY), "EALREADY"); -+#endif -+#ifdef ENOTSOCK -+ TEST_COMPARE_STRING (strerrordesc_np (ENOTSOCK), -+ "Socket operation on non-socket"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOTSOCK), "ENOTSOCK"); -+#endif -+#ifdef EMSGSIZE -+ TEST_COMPARE_STRING (strerrordesc_np (EMSGSIZE), "Message too long"); -+ TEST_COMPARE_STRING (strerrorname_np (EMSGSIZE), "EMSGSIZE"); -+#endif -+#ifdef EPROTOTYPE -+ TEST_COMPARE_STRING (strerrordesc_np (EPROTOTYPE), -+ "Protocol wrong type for socket"); -+ TEST_COMPARE_STRING (strerrorname_np (EPROTOTYPE), "EPROTOTYPE"); -+#endif -+#ifdef ENOPROTOOPT -+ TEST_COMPARE_STRING (strerrordesc_np (ENOPROTOOPT), -+ "Protocol not available"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOPROTOOPT), "ENOPROTOOPT"); -+#endif -+#ifdef EPROTONOSUPPORT -+ TEST_COMPARE_STRING (strerrordesc_np (EPROTONOSUPPORT), -+ "Protocol not supported"); -+ TEST_COMPARE_STRING (strerrorname_np (EPROTONOSUPPORT), "EPROTONOSUPPORT"); -+#endif -+#ifdef ESOCKTNOSUPPORT -+ TEST_COMPARE_STRING (strerrordesc_np (ESOCKTNOSUPPORT), -+ "Socket type not supported"); -+ TEST_COMPARE_STRING (strerrorname_np (ESOCKTNOSUPPORT), "ESOCKTNOSUPPORT"); -+#endif -+#ifdef EOPNOTSUPP -+ TEST_COMPARE_STRING (strerrordesc_np (EOPNOTSUPP), -+ "Operation not supported"); -+ TEST_COMPARE_STRING (strerrorname_np (EOPNOTSUPP), "EOPNOTSUPP"); -+#endif -+#ifdef EPFNOSUPPORT -+ TEST_COMPARE_STRING (strerrordesc_np (EPFNOSUPPORT), -+ "Protocol family not supported"); -+ TEST_COMPARE_STRING (strerrorname_np (EPFNOSUPPORT), "EPFNOSUPPORT"); -+#endif -+#ifdef EAFNOSUPPORT -+ TEST_COMPARE_STRING (strerrordesc_np (EAFNOSUPPORT), -+ "Address family not supported by protocol"); -+ TEST_COMPARE_STRING (strerrorname_np (EAFNOSUPPORT), "EAFNOSUPPORT"); -+#endif -+#ifdef EADDRINUSE -+ TEST_COMPARE_STRING (strerrordesc_np (EADDRINUSE), -+ "Address already in use"); -+ TEST_COMPARE_STRING (strerrorname_np (EADDRINUSE), "EADDRINUSE"); -+#endif -+#ifdef EADDRNOTAVAIL -+ TEST_COMPARE_STRING (strerrordesc_np (EADDRNOTAVAIL), -+ "Cannot assign requested address"); -+ TEST_COMPARE_STRING (strerrorname_np (EADDRNOTAVAIL), "EADDRNOTAVAIL"); -+#endif -+#ifdef ENETDOWN -+ TEST_COMPARE_STRING (strerrordesc_np (ENETDOWN), "Network is down"); -+ TEST_COMPARE_STRING (strerrorname_np (ENETDOWN), "ENETDOWN"); -+#endif -+#ifdef ENETUNREACH -+ TEST_COMPARE_STRING (strerrordesc_np (ENETUNREACH), -+ "Network is unreachable"); -+ TEST_COMPARE_STRING (strerrorname_np (ENETUNREACH), "ENETUNREACH"); -+#endif -+#ifdef ENETRESET -+ TEST_COMPARE_STRING (strerrordesc_np (ENETRESET), -+ "Network dropped connection on reset"); -+ TEST_COMPARE_STRING (strerrorname_np (ENETRESET), "ENETRESET"); -+#endif -+#ifdef ECONNABORTED -+ TEST_COMPARE_STRING (strerrordesc_np (ECONNABORTED), -+ "Software caused connection abort"); -+ TEST_COMPARE_STRING (strerrorname_np (ECONNABORTED), "ECONNABORTED"); -+#endif -+#ifdef ECONNRESET -+ TEST_COMPARE_STRING (strerrordesc_np (ECONNRESET), -+ "Connection reset by peer"); -+ TEST_COMPARE_STRING (strerrorname_np (ECONNRESET), "ECONNRESET"); -+#endif -+#ifdef ENOBUFS -+ TEST_COMPARE_STRING (strerrordesc_np (ENOBUFS), -+ "No buffer space available"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOBUFS), "ENOBUFS"); -+#endif -+#ifdef EISCONN -+ TEST_COMPARE_STRING (strerrordesc_np (EISCONN), -+ "Transport endpoint is already connected"); -+ TEST_COMPARE_STRING (strerrorname_np (EISCONN), "EISCONN"); -+#endif -+#ifdef ENOTCONN -+ TEST_COMPARE_STRING (strerrordesc_np (ENOTCONN), -+ "Transport endpoint is not connected"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOTCONN), "ENOTCONN"); -+#endif -+#ifdef EDESTADDRREQ -+ TEST_COMPARE_STRING (strerrordesc_np (EDESTADDRREQ), -+ "Destination address required"); -+ TEST_COMPARE_STRING (strerrorname_np (EDESTADDRREQ), "EDESTADDRREQ"); -+#endif -+#ifdef ESHUTDOWN -+ TEST_COMPARE_STRING (strerrordesc_np (ESHUTDOWN), -+ "Cannot send after transport endpoint shutdown"); -+ TEST_COMPARE_STRING (strerrorname_np (ESHUTDOWN), "ESHUTDOWN"); -+#endif -+#ifdef ETOOMANYREFS -+ TEST_COMPARE_STRING (strerrordesc_np (ETOOMANYREFS), -+ "Too many references: cannot splice"); -+ TEST_COMPARE_STRING (strerrorname_np (ETOOMANYREFS), "ETOOMANYREFS"); -+#endif -+#ifdef ETIMEDOUT -+ TEST_COMPARE_STRING (strerrordesc_np (ETIMEDOUT), "Connection timed out"); -+ TEST_COMPARE_STRING (strerrorname_np (ETIMEDOUT), "ETIMEDOUT"); -+#endif -+#ifdef ECONNREFUSED -+ TEST_COMPARE_STRING (strerrordesc_np (ECONNREFUSED), "Connection refused"); -+ TEST_COMPARE_STRING (strerrorname_np (ECONNREFUSED), "ECONNREFUSED"); -+#endif -+#ifdef ELOOP -+ TEST_COMPARE_STRING (strerrordesc_np (ELOOP), -+ "Too many levels of symbolic links"); -+ TEST_COMPARE_STRING (strerrorname_np (ELOOP), "ELOOP"); -+#endif -+#ifdef ENAMETOOLONG -+ TEST_COMPARE_STRING (strerrordesc_np (ENAMETOOLONG), "File name too long"); -+ TEST_COMPARE_STRING (strerrorname_np (ENAMETOOLONG), "ENAMETOOLONG"); -+#endif -+#ifdef EHOSTDOWN -+ TEST_COMPARE_STRING (strerrordesc_np (EHOSTDOWN), "Host is down"); -+ TEST_COMPARE_STRING (strerrorname_np (EHOSTDOWN), "EHOSTDOWN"); -+#endif -+#ifdef EHOSTUNREACH -+ TEST_COMPARE_STRING (strerrordesc_np (EHOSTUNREACH), "No route to host"); -+ TEST_COMPARE_STRING (strerrorname_np (EHOSTUNREACH), "EHOSTUNREACH"); -+#endif -+#ifdef ENOTEMPTY -+ TEST_COMPARE_STRING (strerrordesc_np (ENOTEMPTY), "Directory not empty"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOTEMPTY), "ENOTEMPTY"); -+#endif -+#ifdef EUSERS -+ TEST_COMPARE_STRING (strerrordesc_np (EUSERS), "Too many users"); -+ TEST_COMPARE_STRING (strerrorname_np (EUSERS), "EUSERS"); -+#endif -+#ifdef EDQUOT -+ TEST_COMPARE_STRING (strerrordesc_np (EDQUOT), "Disk quota exceeded"); -+ TEST_COMPARE_STRING (strerrorname_np (EDQUOT), "EDQUOT"); -+#endif -+#ifdef ESTALE -+ TEST_COMPARE_STRING (strerrordesc_np (ESTALE), "Stale file handle"); -+ TEST_COMPARE_STRING (strerrorname_np (ESTALE), "ESTALE"); -+#endif -+#ifdef EREMOTE -+ TEST_COMPARE_STRING (strerrordesc_np (EREMOTE), "Object is remote"); -+ TEST_COMPARE_STRING (strerrorname_np (EREMOTE), "EREMOTE"); -+#endif -+#ifdef ENOLCK -+ TEST_COMPARE_STRING (strerrordesc_np (ENOLCK), "No locks available"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOLCK), "ENOLCK"); -+#endif -+#ifdef ENOSYS -+ TEST_COMPARE_STRING (strerrordesc_np (ENOSYS), "Function not implemented"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOSYS), "ENOSYS"); -+#endif -+#ifdef EILSEQ -+ TEST_COMPARE_STRING (strerrordesc_np (EILSEQ), -+ "Invalid or incomplete multibyte or wide character"); -+ TEST_COMPARE_STRING (strerrorname_np (EILSEQ), "EILSEQ"); -+#endif -+#ifdef EBADMSG -+ TEST_COMPARE_STRING (strerrordesc_np (EBADMSG), "Bad message"); -+ TEST_COMPARE_STRING (strerrorname_np (EBADMSG), "EBADMSG"); -+#endif -+#ifdef EIDRM -+ TEST_COMPARE_STRING (strerrordesc_np (EIDRM), "Identifier removed"); -+ TEST_COMPARE_STRING (strerrorname_np (EIDRM), "EIDRM"); -+#endif -+#ifdef EMULTIHOP -+ TEST_COMPARE_STRING (strerrordesc_np (EMULTIHOP), "Multihop attempted"); -+ TEST_COMPARE_STRING (strerrorname_np (EMULTIHOP), "EMULTIHOP"); -+#endif -+#ifdef ENODATA -+ TEST_COMPARE_STRING (strerrordesc_np (ENODATA), "No data available"); -+ TEST_COMPARE_STRING (strerrorname_np (ENODATA), "ENODATA"); -+#endif -+#ifdef ENOLINK -+ TEST_COMPARE_STRING (strerrordesc_np (ENOLINK), "Link has been severed"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOLINK), "ENOLINK"); -+#endif -+#ifdef ENOMSG -+ TEST_COMPARE_STRING (strerrordesc_np (ENOMSG), -+ "No message of desired type"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOMSG), "ENOMSG"); -+#endif -+#ifdef ENOSR -+ TEST_COMPARE_STRING (strerrordesc_np (ENOSR), "Out of streams resources"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOSR), "ENOSR"); -+#endif -+#ifdef ENOSTR -+ TEST_COMPARE_STRING (strerrordesc_np (ENOSTR), "Device not a stream"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOSTR), "ENOSTR"); -+#endif -+#ifdef EOVERFLOW -+ TEST_COMPARE_STRING (strerrordesc_np (EOVERFLOW), -+ "Value too large for defined data type"); -+ TEST_COMPARE_STRING (strerrorname_np (EOVERFLOW), "EOVERFLOW"); -+#endif -+#ifdef EPROTO -+ TEST_COMPARE_STRING (strerrordesc_np (EPROTO), "Protocol error"); -+ TEST_COMPARE_STRING (strerrorname_np (EPROTO), "EPROTO"); -+#endif -+#ifdef ETIME -+ TEST_COMPARE_STRING (strerrordesc_np (ETIME), "Timer expired"); -+ TEST_COMPARE_STRING (strerrorname_np (ETIME), "ETIME"); -+#endif -+#ifdef ECANCELED -+ TEST_COMPARE_STRING (strerrordesc_np (ECANCELED), "Operation canceled"); -+ TEST_COMPARE_STRING (strerrorname_np (ECANCELED), "ECANCELED"); -+#endif -+#ifdef EOWNERDEAD -+ TEST_COMPARE_STRING (strerrordesc_np (EOWNERDEAD), "Owner died"); -+ TEST_COMPARE_STRING (strerrorname_np (EOWNERDEAD), "EOWNERDEAD"); -+#endif -+#ifdef ENOTRECOVERABLE -+ TEST_COMPARE_STRING (strerrordesc_np (ENOTRECOVERABLE), -+ "State not recoverable"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOTRECOVERABLE), "ENOTRECOVERABLE"); -+#endif -+#ifdef ERESTART -+ TEST_COMPARE_STRING (strerrordesc_np (ERESTART), -+ "Interrupted system call should be restarted"); -+ TEST_COMPARE_STRING (strerrorname_np (ERESTART), "ERESTART"); -+#endif -+#ifdef ECHRNG -+ TEST_COMPARE_STRING (strerrordesc_np (ECHRNG), -+ "Channel number out of range"); -+ TEST_COMPARE_STRING (strerrorname_np (ECHRNG), "ECHRNG"); -+#endif -+#ifdef EL2NSYNC -+ TEST_COMPARE_STRING (strerrordesc_np (EL2NSYNC), -+ "Level 2 not synchronized"); -+ TEST_COMPARE_STRING (strerrorname_np (EL2NSYNC), "EL2NSYNC"); -+#endif -+#ifdef EL3HLT -+ TEST_COMPARE_STRING (strerrordesc_np (EL3HLT), "Level 3 halted"); -+ TEST_COMPARE_STRING (strerrorname_np (EL3HLT), "EL3HLT"); -+#endif -+#ifdef EL3RST -+ TEST_COMPARE_STRING (strerrordesc_np (EL3RST), "Level 3 reset"); -+ TEST_COMPARE_STRING (strerrorname_np (EL3RST), "EL3RST"); -+#endif -+#ifdef ELNRNG -+ TEST_COMPARE_STRING (strerrordesc_np (ELNRNG), "Link number out of range"); -+ TEST_COMPARE_STRING (strerrorname_np (ELNRNG), "ELNRNG"); -+#endif -+#ifdef EUNATCH -+ TEST_COMPARE_STRING (strerrordesc_np (EUNATCH), -+ "Protocol driver not attached"); -+ TEST_COMPARE_STRING (strerrorname_np (EUNATCH), "EUNATCH"); -+#endif -+#ifdef ENOCSI -+ TEST_COMPARE_STRING (strerrordesc_np (ENOCSI), -+ "No CSI structure available"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOCSI), "ENOCSI"); -+#endif -+#ifdef EL2HLT -+ TEST_COMPARE_STRING (strerrordesc_np (EL2HLT), "Level 2 halted"); -+ TEST_COMPARE_STRING (strerrorname_np (EL2HLT), "EL2HLT"); -+#endif -+#ifdef EBADE -+ TEST_COMPARE_STRING (strerrordesc_np (EBADE), "Invalid exchange"); -+ TEST_COMPARE_STRING (strerrorname_np (EBADE), "EBADE"); -+#endif -+#ifdef EBADR -+ TEST_COMPARE_STRING (strerrordesc_np (EBADR), -+ "Invalid request descriptor"); -+ TEST_COMPARE_STRING (strerrorname_np (EBADR), "EBADR"); -+#endif -+#ifdef EXFULL -+ TEST_COMPARE_STRING (strerrordesc_np (EXFULL), "Exchange full"); -+ TEST_COMPARE_STRING (strerrorname_np (EXFULL), "EXFULL"); -+#endif -+#ifdef ENOANO -+ TEST_COMPARE_STRING (strerrordesc_np (ENOANO), "No anode"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOANO), "ENOANO"); -+#endif -+#ifdef EBADRQC -+ TEST_COMPARE_STRING (strerrordesc_np (EBADRQC), "Invalid request code"); -+ TEST_COMPARE_STRING (strerrorname_np (EBADRQC), "EBADRQC"); -+#endif -+#ifdef EBADSLT -+ TEST_COMPARE_STRING (strerrordesc_np (EBADSLT), "Invalid slot"); -+ TEST_COMPARE_STRING (strerrorname_np (EBADSLT), "EBADSLT"); -+#endif -+#ifdef EBFONT -+ TEST_COMPARE_STRING (strerrordesc_np (EBFONT), "Bad font file format"); -+ TEST_COMPARE_STRING (strerrorname_np (EBFONT), "EBFONT"); -+#endif -+#ifdef ENONET -+ TEST_COMPARE_STRING (strerrordesc_np (ENONET), -+ "Machine is not on the network"); -+ TEST_COMPARE_STRING (strerrorname_np (ENONET), "ENONET"); -+#endif -+#ifdef ENOPKG -+ TEST_COMPARE_STRING (strerrordesc_np (ENOPKG), "Package not installed"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOPKG), "ENOPKG"); -+#endif -+#ifdef EADV -+ TEST_COMPARE_STRING (strerrordesc_np (EADV), "Advertise error"); -+ TEST_COMPARE_STRING (strerrorname_np (EADV), "EADV"); -+#endif -+#ifdef ESRMNT -+ TEST_COMPARE_STRING (strerrordesc_np (ESRMNT), "Srmount error"); -+ TEST_COMPARE_STRING (strerrorname_np (ESRMNT), "ESRMNT"); -+#endif -+#ifdef ECOMM -+ TEST_COMPARE_STRING (strerrordesc_np (ECOMM), -+ "Communication error on send"); -+ TEST_COMPARE_STRING (strerrorname_np (ECOMM), "ECOMM"); -+#endif -+#ifdef EDOTDOT -+ TEST_COMPARE_STRING (strerrordesc_np (EDOTDOT), "RFS specific error"); -+ TEST_COMPARE_STRING (strerrorname_np (EDOTDOT), "EDOTDOT"); -+#endif -+#ifdef ENOTUNIQ -+ TEST_COMPARE_STRING (strerrordesc_np (ENOTUNIQ), -+ "Name not unique on network"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOTUNIQ), "ENOTUNIQ"); -+#endif -+#ifdef EBADFD -+ TEST_COMPARE_STRING (strerrordesc_np (EBADFD), -+ "File descriptor in bad state"); -+ TEST_COMPARE_STRING (strerrorname_np (EBADFD), "EBADFD"); -+#endif -+#ifdef EREMCHG -+ TEST_COMPARE_STRING (strerrordesc_np (EREMCHG), "Remote address changed"); -+ TEST_COMPARE_STRING (strerrorname_np (EREMCHG), "EREMCHG"); -+#endif -+#ifdef ELIBACC -+ TEST_COMPARE_STRING (strerrordesc_np (ELIBACC), -+ "Can not access a needed shared library"); -+ TEST_COMPARE_STRING (strerrorname_np (ELIBACC), "ELIBACC"); -+#endif -+#ifdef ELIBBAD -+ TEST_COMPARE_STRING (strerrordesc_np (ELIBBAD), -+ "Accessing a corrupted shared library"); -+ TEST_COMPARE_STRING (strerrorname_np (ELIBBAD), "ELIBBAD"); -+#endif -+#ifdef ELIBSCN -+ TEST_COMPARE_STRING (strerrordesc_np (ELIBSCN), -+ ".lib section in a.out corrupted"); -+ TEST_COMPARE_STRING (strerrorname_np (ELIBSCN), "ELIBSCN"); -+#endif -+#ifdef ELIBMAX -+ TEST_COMPARE_STRING (strerrordesc_np (ELIBMAX), -+ "Attempting to link in too many shared libraries"); -+ TEST_COMPARE_STRING (strerrorname_np (ELIBMAX), "ELIBMAX"); -+#endif -+#ifdef ELIBEXEC -+ TEST_COMPARE_STRING (strerrordesc_np (ELIBEXEC), -+ "Cannot exec a shared library directly"); -+ TEST_COMPARE_STRING (strerrorname_np (ELIBEXEC), "ELIBEXEC"); -+#endif -+#ifdef ESTRPIPE -+ TEST_COMPARE_STRING (strerrordesc_np (ESTRPIPE), "Streams pipe error"); -+ TEST_COMPARE_STRING (strerrorname_np (ESTRPIPE), "ESTRPIPE"); -+#endif -+#ifdef EUCLEAN -+ TEST_COMPARE_STRING (strerrordesc_np (EUCLEAN), -+ "Structure needs cleaning"); -+ TEST_COMPARE_STRING (strerrorname_np (EUCLEAN), "EUCLEAN"); -+#endif -+#ifdef ENOTNAM -+ TEST_COMPARE_STRING (strerrordesc_np (ENOTNAM), -+ "Not a XENIX named type file"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOTNAM), "ENOTNAM"); -+#endif -+#ifdef ENAVAIL -+ TEST_COMPARE_STRING (strerrordesc_np (ENAVAIL), -+ "No XENIX semaphores available"); -+ TEST_COMPARE_STRING (strerrorname_np (ENAVAIL), "ENAVAIL"); -+#endif -+#ifdef EISNAM -+ TEST_COMPARE_STRING (strerrordesc_np (EISNAM), "Is a named type file"); -+ TEST_COMPARE_STRING (strerrorname_np (EISNAM), "EISNAM"); -+#endif -+#ifdef EREMOTEIO -+ TEST_COMPARE_STRING (strerrordesc_np (EREMOTEIO), "Remote I/O error"); -+ TEST_COMPARE_STRING (strerrorname_np (EREMOTEIO), "EREMOTEIO"); -+#endif -+#ifdef ENOMEDIUM -+ TEST_COMPARE_STRING (strerrordesc_np (ENOMEDIUM), "No medium found"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOMEDIUM), "ENOMEDIUM"); -+#endif -+#ifdef EMEDIUMTYPE -+ TEST_COMPARE_STRING (strerrordesc_np (EMEDIUMTYPE), "Wrong medium type"); -+ TEST_COMPARE_STRING (strerrorname_np (EMEDIUMTYPE), "EMEDIUMTYPE"); -+#endif -+#ifdef ENOKEY -+ TEST_COMPARE_STRING (strerrordesc_np (ENOKEY), -+ "Required key not available"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOKEY), "ENOKEY"); -+#endif -+#ifdef EKEYEXPIRED -+ TEST_COMPARE_STRING (strerrordesc_np (EKEYEXPIRED), "Key has expired"); -+ TEST_COMPARE_STRING (strerrorname_np (EKEYEXPIRED), "EKEYEXPIRED"); -+#endif -+#ifdef EKEYREVOKED -+ TEST_COMPARE_STRING (strerrordesc_np (EKEYREVOKED), -+ "Key has been revoked"); -+ TEST_COMPARE_STRING (strerrorname_np (EKEYREVOKED), "EKEYREVOKED"); -+#endif -+#ifdef EKEYREJECTED -+ TEST_COMPARE_STRING (strerrordesc_np (EKEYREJECTED), -+ "Key was rejected by service"); -+ TEST_COMPARE_STRING (strerrorname_np (EKEYREJECTED), "EKEYREJECTED"); -+#endif -+#ifdef ERFKILL -+ TEST_COMPARE_STRING (strerrordesc_np (ERFKILL), -+ "Operation not possible due to RF-kill"); -+ TEST_COMPARE_STRING (strerrorname_np (ERFKILL), "ERFKILL"); -+#endif -+#ifdef EHWPOISON -+ TEST_COMPARE_STRING (strerrordesc_np (EHWPOISON), -+ "Memory page has hardware error"); -+ TEST_COMPARE_STRING (strerrorname_np (EHWPOISON), "EHWPOISON"); -+#endif -+#ifdef EBADRPC -+ TEST_COMPARE_STRING (strerrordesc_np (EBADRPC), "RPC struct is bad"); -+ TEST_COMPARE_STRING (strerrorname_np (EBADRPC), "EBADRPC"); -+#endif -+#ifdef EFTYPE -+ TEST_COMPARE_STRING (strerrordesc_np (EFTYPE), -+ "Inappropriate file type or format"); -+ TEST_COMPARE_STRING (strerrorname_np (EFTYPE), "EFTYPE"); -+#endif -+#ifdef EPROCUNAVAIL -+ TEST_COMPARE_STRING (strerrordesc_np (EPROCUNAVAIL), -+ "RPC bad procedure for program"); -+ TEST_COMPARE_STRING (strerrorname_np (EPROCUNAVAIL), "EPROCUNAVAIL"); -+#endif -+#ifdef EAUTH -+ TEST_COMPARE_STRING (strerrordesc_np (EAUTH), "Authentication error"); -+ TEST_COMPARE_STRING (strerrorname_np (EAUTH), "EAUTH"); -+#endif -+#ifdef EDIED -+ TEST_COMPARE_STRING (strerrordesc_np (EDIED), "Translator died"); -+ TEST_COMPARE_STRING (strerrorname_np (EDIED), "EDIED"); -+#endif -+#ifdef ERPCMISMATCH -+ TEST_COMPARE_STRING (strerrordesc_np (ERPCMISMATCH), "RPC version wrong"); -+ TEST_COMPARE_STRING (strerrorname_np (ERPCMISMATCH), "ERPCMISMATCH"); -+#endif -+#ifdef EGREGIOUS -+ TEST_COMPARE_STRING (strerrordesc_np (EGREGIOUS), -+ "You really blew it this time"); -+ TEST_COMPARE_STRING (strerrorname_np (EGREGIOUS), "EGREGIOUS"); -+#endif -+#ifdef EPROCLIM -+ TEST_COMPARE_STRING (strerrordesc_np (EPROCLIM), "Too many processes"); -+ TEST_COMPARE_STRING (strerrorname_np (EPROCLIM), "EPROCLIM"); -+#endif -+#ifdef EGRATUITOUS -+ TEST_COMPARE_STRING (strerrordesc_np (EGRATUITOUS), "Gratuitous error"); -+ TEST_COMPARE_STRING (strerrorname_np (EGRATUITOUS), "EGRATUITOUS"); -+#endif -+#if defined (ENOTSUP) && ENOTSUP != EOPNOTSUPP -+ TEST_COMPARE_STRING (strerrordesc_np (ENOTSUP), "Not supported"); -+ TEST_COMPARE_STRING (strerrorname_np (ENOTSUP), "ENOTSUP"); -+#endif -+#ifdef EPROGMISMATCH -+ TEST_COMPARE_STRING (strerrordesc_np (EPROGMISMATCH), -+ "RPC program version wrong"); -+ TEST_COMPARE_STRING (strerrorname_np (EPROGMISMATCH), "EPROGMISMATCH"); -+#endif -+#ifdef EBACKGROUND -+ TEST_COMPARE_STRING (strerrordesc_np (EBACKGROUND), -+ "Inappropriate operation for background process"); -+ TEST_COMPARE_STRING (strerrorname_np (EBACKGROUND), "EBACKGROUND"); -+#endif -+#ifdef EIEIO -+ TEST_COMPARE_STRING (strerrordesc_np (EIEIO), "Computer bought the farm"); -+ TEST_COMPARE_STRING (strerrorname_np (EIEIO), "EIEIO"); -+#endif -+#if defined (EWOULDBLOCK) && EWOULDBLOCK != EAGAIN -+ TEST_COMPARE_STRING (strerrordesc_np (EWOULDBLOCK), -+ "Operation would block"); -+ TEST_COMPARE_STRING (strerrorname_np (EWOULDBLOCK), "EWOULDBLOCK"); -+#endif -+#ifdef ENEEDAUTH -+ TEST_COMPARE_STRING (strerrordesc_np (ENEEDAUTH), "Need authenticator"); -+ TEST_COMPARE_STRING (strerrorname_np (ENEEDAUTH), "ENEEDAUTH"); -+#endif -+#ifdef ED -+ TEST_COMPARE_STRING (strerrordesc_np (ED), "?"); -+ TEST_COMPARE_STRING (strerrorname_np (ED), "ED"); -+#endif -+#ifdef EPROGUNAVAIL -+ TEST_COMPARE_STRING (strerrordesc_np (EPROGUNAVAIL), -+ "RPC program not available"); -+ TEST_COMPARE_STRING (strerrorname_np (EPROGUNAVAIL), "EPROGUNAVAIL"); -+#endif - - return 0; - } -diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c -index 95b46dcbeb..3a323547f9 100644 ---- a/stdio-common/vfscanf-internal.c -+++ b/stdio-common/vfscanf-internal.c -@@ -277,7 +277,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, - #endif - { - va_list arg; -- const CHAR_T *f = format; -+ const UCHAR_T *f = (const UCHAR_T *) format; - UCHAR_T fc; /* Current character of the format. */ - WINT_T done = 0; /* Assignments done. */ - size_t read_in = 0; /* Chars read in. */ -@@ -415,10 +415,11 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, - #endif - - #ifndef COMPILE_WSCANF -- if (!isascii ((unsigned char) *f)) -+ if (!isascii (*f)) - { - /* Non-ASCII, may be a multibyte. */ -- int len = __mbrlen (f, strlen (f), &state); -+ int len = __mbrlen ((const char *) f, strlen ((const char *) f), -+ &state); - if (len > 0) - { - do -@@ -426,7 +427,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, - c = inchar (); - if (__glibc_unlikely (c == EOF)) - input_error (); -- else if (c != (unsigned char) *f++) -+ else if (c != *f++) - { - ungetc_not_eof (c, s); - conv_error (); -@@ -484,9 +485,9 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, - char_buffer_rewind (&charbuf); - - /* Check for a positional parameter specification. */ -- if (ISDIGIT ((UCHAR_T) *f)) -+ if (ISDIGIT (*f)) - { -- argpos = read_int ((const UCHAR_T **) &f); -+ argpos = read_int (&f); - if (*f == L_('$')) - ++f; - else -@@ -521,8 +522,8 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, - - /* Find the maximum field width. */ - width = 0; -- if (ISDIGIT ((UCHAR_T) *f)) -- width = read_int ((const UCHAR_T **) &f); -+ if (ISDIGIT (*f)) -+ width = read_int (&f); - got_width: - if (width == 0) - width = -1; -@@ -2522,12 +2523,11 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, - } - - while ((fc = *f++) != '\0' && fc != ']') -- if (fc == '-' && *f != '\0' && *f != ']' -- && (unsigned char) f[-2] <= (unsigned char) *f) -+ if (fc == '-' && *f != '\0' && *f != ']' && f[-2] <= *f) - { - /* Add all characters from the one before the '-' - up to (but not including) the next format char. */ -- for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc) -+ for (fc = f[-2]; fc < *f; ++fc) - ((char *)charbuf.scratch.data)[fc] = 1; - } - else -diff --git a/stdlib/tst-secure-getenv.c b/stdlib/tst-secure-getenv.c -index 3cfe9a05c3..d4b1139c5e 100644 ---- a/stdlib/tst-secure-getenv.c -+++ b/stdlib/tst-secure-getenv.c -@@ -30,167 +30,12 @@ - #include <sys/wait.h> - #include <unistd.h> - -+#include <support/check.h> - #include <support/support.h> -+#include <support/capture_subprocess.h> - #include <support/test-driver.h> - - static char MAGIC_ARGUMENT[] = "run-actual-test"; --#define MAGIC_STATUS 19 -- --/* Return a GID which is not our current GID, but is present in the -- supplementary group list. */ --static gid_t --choose_gid (void) --{ -- int count = getgroups (0, NULL); -- if (count < 0) -- { -- printf ("getgroups: %m\n"); -- exit (1); -- } -- gid_t *groups; -- groups = xcalloc (count, sizeof (*groups)); -- int ret = getgroups (count, groups); -- if (ret < 0) -- { -- printf ("getgroups: %m\n"); -- exit (1); -- } -- gid_t current = getgid (); -- gid_t not_current = 0; -- for (int i = 0; i < ret; ++i) -- { -- if (groups[i] != current) -- { -- not_current = groups[i]; -- break; -- } -- } -- free (groups); -- return not_current; --} -- -- --/* Copies the executable into a restricted directory, so that we can -- safely make it SGID with the TARGET group ID. Then runs the -- executable. */ --static int --run_executable_sgid (gid_t target) --{ -- char *dirname = xasprintf ("%s/secure-getenv.%jd", -- test_dir, (intmax_t) getpid ()); -- char *execname = xasprintf ("%s/bin", dirname); -- int infd = -1; -- int outfd = -1; -- int ret = -1; -- if (mkdir (dirname, 0700) < 0) -- { -- printf ("mkdir: %m\n"); -- goto err; -- } -- infd = open ("/proc/self/exe", O_RDONLY); -- if (infd < 0) -- { -- printf ("open (/proc/self/exe): %m\n"); -- goto err; -- } -- outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); -- if (outfd < 0) -- { -- printf ("open (%s): %m\n", execname); -- goto err; -- } -- char buf[4096]; -- for (;;) -- { -- ssize_t rdcount = read (infd, buf, sizeof (buf)); -- if (rdcount < 0) -- { -- printf ("read: %m\n"); -- goto err; -- } -- if (rdcount == 0) -- break; -- char *p = buf; -- char *end = buf + rdcount; -- while (p != end) -- { -- ssize_t wrcount = write (outfd, buf, end - p); -- if (wrcount == 0) -- errno = ENOSPC; -- if (wrcount <= 0) -- { -- printf ("write: %m\n"); -- goto err; -- } -- p += wrcount; -- } -- } -- if (fchown (outfd, getuid (), target) < 0) -- { -- printf ("fchown (%s): %m\n", execname); -- goto err; -- } -- if (fchmod (outfd, 02750) < 0) -- { -- printf ("fchmod (%s): %m\n", execname); -- goto err; -- } -- if (close (outfd) < 0) -- { -- printf ("close (outfd): %m\n"); -- goto err; -- } -- if (close (infd) < 0) -- { -- printf ("close (infd): %m\n"); -- goto err; -- } -- -- int kid = fork (); -- if (kid < 0) -- { -- printf ("fork: %m\n"); -- goto err; -- } -- if (kid == 0) -- { -- /* Child process. */ -- char *args[] = { execname, MAGIC_ARGUMENT, NULL }; -- execve (execname, args, environ); -- printf ("execve (%s): %m\n", execname); -- _exit (1); -- } -- int status; -- if (waitpid (kid, &status, 0) < 0) -- { -- printf ("waitpid: %m\n"); -- goto err; -- } -- if (!WIFEXITED (status) || WEXITSTATUS (status) != MAGIC_STATUS) -- { -- printf ("Unexpected exit status %d from child process\n", -- status); -- goto err; -- } -- ret = 0; -- --err: -- if (outfd >= 0) -- close (outfd); -- if (infd >= 0) -- close (infd); -- if (execname) -- { -- unlink (execname); -- free (execname); -- } -- if (dirname) -- { -- rmdir (dirname); -- free (dirname); -- } -- return ret; --} - - static int - do_test (void) -@@ -212,15 +57,15 @@ do_test (void) - exit (1); - } - -- gid_t target = choose_gid (); -- if (target == 0) -- { -- fprintf (stderr, -- "Could not find a suitable GID for user %jd, skipping test\n", -- (intmax_t) getuid ()); -- exit (0); -- } -- return run_executable_sgid (target); -+ int status = support_capture_subprogram_self_sgid (MAGIC_ARGUMENT); -+ -+ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) -+ return EXIT_UNSUPPORTED; -+ -+ if (!WIFEXITED (status)) -+ FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); -+ -+ return 0; - } - - static void -@@ -229,23 +74,15 @@ alternative_main (int argc, char **argv) - if (argc == 2 && strcmp (argv[1], MAGIC_ARGUMENT) == 0) - { - if (getgid () == getegid ()) -- { -- /* This can happen if the file system is mounted nosuid. */ -- fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n", -- (intmax_t) getgid ()); -- exit (MAGIC_STATUS); -- } -+ /* This can happen if the file system is mounted nosuid. */ -+ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", -+ (intmax_t) getgid ()); - if (getenv ("PATH") == NULL) -- { -- printf ("PATH variable not present\n"); -- exit (3); -- } -+ FAIL_EXIT (3, "PATH variable not present\n"); - if (secure_getenv ("PATH") != NULL) -- { -- printf ("PATH variable not filtered out\n"); -- exit (4); -- } -- exit (MAGIC_STATUS); -+ FAIL_EXIT (4, "PATH variable not filtered out\n"); -+ -+ exit (EXIT_SUCCESS); - } - } - -diff --git a/string/bits/string_fortified.h b/string/bits/string_fortified.h -index 309d0f39b2..c8d3051af8 100644 ---- a/string/bits/string_fortified.h -+++ b/string/bits/string_fortified.h -@@ -22,11 +22,6 @@ - # error "Never use <bits/string_fortified.h> directly; include <string.h> instead." - #endif - --#if !__GNUC_PREREQ (5,0) --__warndecl (__warn_memset_zero_len, -- "memset used with constant zero length parameter; this could be due to transposed parameters"); --#endif -- - __fortify_function void * - __NTH (memcpy (void *__restrict __dest, const void *__restrict __src, - size_t __len)) -@@ -58,16 +53,6 @@ __NTH (mempcpy (void *__restrict __dest, const void *__restrict __src, - __fortify_function void * - __NTH (memset (void *__dest, int __ch, size_t __len)) - { -- /* GCC-5.0 and newer implements these checks in the compiler, so we don't -- need them here. */ --#if !__GNUC_PREREQ (5,0) -- if (__builtin_constant_p (__len) && __len == 0 -- && (!__builtin_constant_p (__ch) || __ch != 0)) -- { -- __warn_memset_zero_len (); -- return __dest; -- } --#endif - return __builtin___memset_chk (__dest, __ch, __len, __bos0 (__dest)); - } - -diff --git a/support/Makefile b/support/Makefile -index 93faafddf9..3d3aff5ff9 100644 ---- a/support/Makefile -+++ b/support/Makefile -@@ -35,6 +35,8 @@ libsupport-routines = \ - ignore_stderr \ - next_to_fault \ - oom_error \ -+ resolv_response_context_duplicate \ -+ resolv_response_context_free \ - resolv_test \ - set_fortify_handler \ - support-xfstat \ -@@ -133,6 +135,7 @@ libsupport-routines = \ - xpthread_join \ - xpthread_key_create \ - xpthread_key_delete \ -+ xpthread_kill \ - xpthread_mutex_consistent \ - xpthread_mutex_destroy \ - xpthread_mutex_init \ -diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h -index 9808750f80..421f657678 100644 ---- a/support/capture_subprocess.h -+++ b/support/capture_subprocess.h -@@ -41,6 +41,12 @@ struct support_capture_subprocess support_capture_subprocess - struct support_capture_subprocess support_capture_subprogram - (const char *file, char *const argv[]); - -+/* Copy the running program into a setgid binary and run it with CHILD_ID -+ argument. If execution is successful, return the exit status of the child -+ program, otherwise return a non-zero failure exit code. */ -+int support_capture_subprogram_self_sgid -+ (char *child_id); -+ - /* Deallocate the subprocess data captured by - support_capture_subprocess. */ - void support_capture_subprocess_free (struct support_capture_subprocess *); -diff --git a/support/resolv_response_context_duplicate.c b/support/resolv_response_context_duplicate.c -new file mode 100644 -index 0000000000..f9c5c3462a ---- /dev/null -+++ b/support/resolv_response_context_duplicate.c -@@ -0,0 +1,37 @@ -+/* Duplicate a response context used in DNS resolver tests. -+ Copyright (C) 2020 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <string.h> -+#include <support/resolv_test.h> -+#include <support/support.h> -+ -+struct resolv_response_context * -+resolv_response_context_duplicate (const struct resolv_response_context *ctx) -+{ -+ struct resolv_response_context *result = xmalloc (sizeof (*result)); -+ memcpy (result, ctx, sizeof (*result)); -+ if (result->client_address != NULL) -+ { -+ result->client_address = xmalloc (result->client_address_length); -+ memcpy (result->client_address, ctx->client_address, -+ result->client_address_length); -+ } -+ result->query_buffer = xmalloc (result->query_length); -+ memcpy (result->query_buffer, ctx->query_buffer, result->query_length); -+ return result; -+} -diff --git a/support/resolv_response_context_free.c b/support/resolv_response_context_free.c -new file mode 100644 -index 0000000000..b88c05ffd4 ---- /dev/null -+++ b/support/resolv_response_context_free.c -@@ -0,0 +1,28 @@ -+/* Free a response context used in DNS resolver tests. -+ Copyright (C) 2020 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <stdlib.h> -+#include <support/resolv_test.h> -+ -+void -+resolv_response_context_free (struct resolv_response_context *ctx) -+{ -+ free (ctx->query_buffer); -+ free (ctx->client_address); -+ free (ctx); -+} -diff --git a/support/resolv_test.c b/support/resolv_test.c -index 53b7fc41ab..9878a040a3 100644 ---- a/support/resolv_test.c -+++ b/support/resolv_test.c -@@ -181,7 +181,9 @@ resolv_response_init (struct resolv_response_builder *b, - b->buffer[2] |= b->query_buffer[2] & 0x01; /* Copy the RD bit. */ - if (flags.tc) - b->buffer[2] |= 0x02; -- b->buffer[3] = 0x80 | flags.rcode; /* Always set RA. */ -+ b->buffer[3] = flags.rcode; -+ if (!flags.clear_ra) -+ b->buffer[3] |= 0x80; - if (flags.ad) - b->buffer[3] |= 0x20; - -@@ -434,9 +436,9 @@ resolv_response_buffer (const struct resolv_response_builder *b) - return result; - } - --static struct resolv_response_builder * --response_builder_allocate -- (const unsigned char *query_buffer, size_t query_length) -+struct resolv_response_builder * -+resolv_response_builder_allocate (const unsigned char *query_buffer, -+ size_t query_length) - { - struct resolv_response_builder *b = xmalloc (sizeof (*b)); - memset (b, 0, offsetof (struct resolv_response_builder, buffer)); -@@ -445,8 +447,8 @@ response_builder_allocate - return b; - } - --static void --response_builder_free (struct resolv_response_builder *b) -+void -+resolv_response_builder_free (struct resolv_response_builder *b) - { - tdestroy (b->compression_offsets, free); - free (b); -@@ -661,13 +663,17 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index) - - struct resolv_response_context ctx = - { -+ .test = obj, -+ .client_address = &peer, -+ .client_address_length = peerlen, - .query_buffer = query, - .query_length = length, - .server_index = server_index, - .tcp = false, - .edns = qinfo.edns, - }; -- struct resolv_response_builder *b = response_builder_allocate (query, length); -+ struct resolv_response_builder *b -+ = resolv_response_builder_allocate (query, length); - obj->config.response_callback - (&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype); - -@@ -684,7 +690,7 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index) - if (b->offset >= 12) - printf ("info: UDP server %d: sending response:" - " %zu bytes, RCODE %d (for %s/%u/%u)\n", -- server_index, b->offset, b->buffer[3] & 0x0f, -+ ctx.server_index, b->offset, b->buffer[3] & 0x0f, - qinfo.qname, qinfo.qclass, qinfo.qtype); - else - printf ("info: UDP server %d: sending response: %zu bytes" -@@ -694,23 +700,31 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index) - if (b->truncate_bytes > 0) - printf ("info: truncated by %u bytes\n", b->truncate_bytes); - } -- size_t to_send = b->offset; -- if (to_send < b->truncate_bytes) -- to_send = 0; -- else -- to_send -= b->truncate_bytes; -- -- /* Ignore most errors here because the other end may have closed -- the socket. */ -- if (sendto (obj->servers[server_index].socket_udp, -- b->buffer, to_send, 0, -- (struct sockaddr *) &peer, peerlen) < 0) -- TEST_VERIFY_EXIT (errno != EBADF); -+ resolv_response_send_udp (&ctx, b); - } -- response_builder_free (b); -+ resolv_response_builder_free (b); - return true; - } - -+void -+resolv_response_send_udp (const struct resolv_response_context *ctx, -+ struct resolv_response_builder *b) -+{ -+ TEST_VERIFY_EXIT (!ctx->tcp); -+ size_t to_send = b->offset; -+ if (to_send < b->truncate_bytes) -+ to_send = 0; -+ else -+ to_send -= b->truncate_bytes; -+ -+ /* Ignore most errors here because the other end may have closed -+ the socket. */ -+ if (sendto (ctx->test->servers[ctx->server_index].socket_udp, -+ b->buffer, to_send, 0, -+ ctx->client_address, ctx->client_address_length) < 0) -+ TEST_VERIFY_EXIT (errno != EBADF); -+} -+ - /* UDP thread_callback function. Variant for one thread per - server. */ - static void -@@ -897,14 +911,15 @@ server_thread_tcp_client (void *arg) - - struct resolv_response_context ctx = - { -+ .test = closure->obj, - .query_buffer = query_buffer, - .query_length = query_length, - .server_index = closure->server_index, - .tcp = true, - .edns = qinfo.edns, - }; -- struct resolv_response_builder *b = response_builder_allocate -- (query_buffer, query_length); -+ struct resolv_response_builder *b -+ = resolv_response_builder_allocate (query_buffer, query_length); - closure->obj->config.response_callback - (&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype); - -@@ -936,7 +951,7 @@ server_thread_tcp_client (void *arg) - writev_fully (closure->client_socket, buffers, 2); - } - bool close_flag = b->close; -- response_builder_free (b); -+ resolv_response_builder_free (b); - free (query_buffer); - if (close_flag) - break; -diff --git a/support/resolv_test.h b/support/resolv_test.h -index 67819469a0..31a5c1c3e7 100644 ---- a/support/resolv_test.h -+++ b/support/resolv_test.h -@@ -35,25 +35,36 @@ struct resolv_edns_info - uint16_t payload_size; - }; - -+/* This opaque struct collects information about the resolver testing -+ currently in progress. */ -+struct resolv_test; -+ - /* This struct provides context information when the response callback - specified in struct resolv_redirect_config is invoked. */ - struct resolv_response_context - { -- const unsigned char *query_buffer; -+ struct resolv_test *test; -+ void *client_address; -+ size_t client_address_length; -+ unsigned char *query_buffer; - size_t query_length; - int server_index; - bool tcp; - struct resolv_edns_info edns; - }; - -+/* Produces a deep copy of the context. */ -+struct resolv_response_context * -+ resolv_response_context_duplicate (const struct resolv_response_context *); -+ -+/* Frees the copy. For the context passed to the response function, -+ this happens implicitly. */ -+void resolv_response_context_free (struct resolv_response_context *); -+ - /* This opaque struct is used to construct responses from within the - response callback function. */ - struct resolv_response_builder; - --/* This opaque struct collects information about the resolver testing -- currently in progress. */ --struct resolv_test; -- - enum - { - /* Maximum number of test servers supported by the framework. */ -@@ -137,6 +148,10 @@ struct resolv_response_flags - /* If true, the AD (authenticated data) flag will be set. */ - bool ad; - -+ /* If true, do not set the RA (recursion available) flag in the -+ response. */ -+ bool clear_ra; -+ - /* Initial section count values. Can be used to artificially - increase the counts, for malformed packet testing.*/ - unsigned short qdcount; -@@ -188,6 +203,22 @@ void resolv_response_close (struct resolv_response_builder *); - /* The size of the response packet built so far. */ - size_t resolv_response_length (const struct resolv_response_builder *); - -+/* Allocates a response builder tied to a specific query packet, -+ starting at QUERY_BUFFER, containing QUERY_LENGTH bytes. */ -+struct resolv_response_builder * -+ resolv_response_builder_allocate (const unsigned char *query_buffer, -+ size_t query_length); -+ -+/* Deallocates a response buffer. */ -+void resolv_response_builder_free (struct resolv_response_builder *); -+ -+/* Sends a UDP response using a specific context. This can be used to -+ reorder or duplicate responses, along with -+ resolv_response_context_duplicate and -+ response_builder_allocate. */ -+void resolv_response_send_udp (const struct resolv_response_context *, -+ struct resolv_response_builder *); -+ - __END_DECLS - - #endif /* SUPPORT_RESOLV_TEST_H */ -diff --git a/support/subprocess.h b/support/subprocess.h -index 8b442fd5c0..34ffd02e8e 100644 ---- a/support/subprocess.h -+++ b/support/subprocess.h -@@ -38,6 +38,11 @@ struct support_subprocess support_subprocess - struct support_subprocess support_subprogram - (const char *file, char *const argv[]); - -+/* Invoke program FILE with ARGV arguments by using posix_spawn and wait for it -+ to complete. Return program exit status. */ -+int support_subprogram_wait -+ (const char *file, char *const argv[]); -+ - /* Wait for the subprocess indicated by PROC::PID. Return the status - indicate by waitpid call. */ - int support_process_wait (struct support_subprocess *proc); -diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c -index eeed676e3d..28a37df67f 100644 ---- a/support/support_capture_subprocess.c -+++ b/support/support_capture_subprocess.c -@@ -20,11 +20,14 @@ - #include <support/capture_subprocess.h> - - #include <errno.h> -+#include <fcntl.h> - #include <stdlib.h> - #include <support/check.h> - #include <support/xunistd.h> - #include <support/xsocket.h> - #include <support/xspawn.h> -+#include <support/support.h> -+#include <support/test-driver.h> - - static void - transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream) -@@ -36,7 +39,7 @@ transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream) - if (ret < 0) - { - support_record_failure (); -- printf ("error: reading from subprocess %s: %m", what); -+ printf ("error: reading from subprocess %s: %m\n", what); - pfd->events = 0; - pfd->revents = 0; - } -@@ -102,6 +105,129 @@ support_capture_subprogram (const char *file, char *const argv[]) - return result; - } - -+/* Copies the executable into a restricted directory, so that we can -+ safely make it SGID with the TARGET group ID. Then runs the -+ executable. */ -+static int -+copy_and_spawn_sgid (char *child_id, gid_t gid) -+{ -+ char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd", -+ test_dir, (intmax_t) getpid ()); -+ char *execname = xasprintf ("%s/bin", dirname); -+ int infd = -1; -+ int outfd = -1; -+ int ret = 1, status = 1; -+ -+ TEST_VERIFY (mkdir (dirname, 0700) == 0); -+ if (support_record_failure_is_failed ()) -+ goto err; -+ -+ infd = open ("/proc/self/exe", O_RDONLY); -+ if (infd < 0) -+ FAIL_UNSUPPORTED ("unsupported: Cannot read binary from procfs\n"); -+ -+ outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); -+ TEST_VERIFY (outfd >= 0); -+ if (support_record_failure_is_failed ()) -+ goto err; -+ -+ char buf[4096]; -+ for (;;) -+ { -+ ssize_t rdcount = read (infd, buf, sizeof (buf)); -+ TEST_VERIFY (rdcount >= 0); -+ if (support_record_failure_is_failed ()) -+ goto err; -+ if (rdcount == 0) -+ break; -+ char *p = buf; -+ char *end = buf + rdcount; -+ while (p != end) -+ { -+ ssize_t wrcount = write (outfd, buf, end - p); -+ if (wrcount == 0) -+ errno = ENOSPC; -+ TEST_VERIFY (wrcount > 0); -+ if (support_record_failure_is_failed ()) -+ goto err; -+ p += wrcount; -+ } -+ } -+ TEST_VERIFY (fchown (outfd, getuid (), gid) == 0); -+ if (support_record_failure_is_failed ()) -+ goto err; -+ TEST_VERIFY (fchmod (outfd, 02750) == 0); -+ if (support_record_failure_is_failed ()) -+ goto err; -+ TEST_VERIFY (close (outfd) == 0); -+ if (support_record_failure_is_failed ()) -+ goto err; -+ TEST_VERIFY (close (infd) == 0); -+ if (support_record_failure_is_failed ()) -+ goto err; -+ -+ /* We have the binary, now spawn the subprocess. Avoid using -+ support_subprogram because we only want the program exit status, not the -+ contents. */ -+ ret = 0; -+ -+ char * const args[] = {execname, child_id, NULL}; -+ -+ status = support_subprogram_wait (args[0], args); -+ -+err: -+ if (outfd >= 0) -+ close (outfd); -+ if (infd >= 0) -+ close (infd); -+ if (execname != NULL) -+ { -+ unlink (execname); -+ free (execname); -+ } -+ if (dirname != NULL) -+ { -+ rmdir (dirname); -+ free (dirname); -+ } -+ -+ if (ret != 0) -+ FAIL_EXIT1("Failed to make sgid executable for test\n"); -+ -+ return status; -+} -+ -+int -+support_capture_subprogram_self_sgid (char *child_id) -+{ -+ gid_t target = 0; -+ const int count = 64; -+ gid_t groups[count]; -+ -+ /* Get a GID which is not our current GID, but is present in the -+ supplementary group list. */ -+ int ret = getgroups (count, groups); -+ if (ret < 0) -+ FAIL_UNSUPPORTED("Could not get group list for user %jd\n", -+ (intmax_t) getuid ()); -+ -+ gid_t current = getgid (); -+ for (int i = 0; i < ret; ++i) -+ { -+ if (groups[i] != current) -+ { -+ target = groups[i]; -+ break; -+ } -+ } -+ -+ if (target == 0) -+ FAIL_UNSUPPORTED("Could not find a suitable GID for user %jd\n", -+ (intmax_t) getuid ()); -+ -+ return copy_and_spawn_sgid (child_id, target); -+} -+ - void - support_capture_subprocess_free (struct support_capture_subprocess *p) - { -diff --git a/support/support_subprocess.c b/support/support_subprocess.c -index 36e3a77af2..4a25828111 100644 ---- a/support/support_subprocess.c -+++ b/support/support_subprocess.c -@@ -27,7 +27,7 @@ - #include <support/subprocess.h> - - static struct support_subprocess --support_suprocess_init (void) -+support_subprocess_init (void) - { - struct support_subprocess result; - -@@ -48,7 +48,7 @@ support_suprocess_init (void) - struct support_subprocess - support_subprocess (void (*callback) (void *), void *closure) - { -- struct support_subprocess result = support_suprocess_init (); -+ struct support_subprocess result = support_subprocess_init (); - - result.pid = xfork (); - if (result.pid == 0) -@@ -71,7 +71,7 @@ support_subprocess (void (*callback) (void *), void *closure) - struct support_subprocess - support_subprogram (const char *file, char *const argv[]) - { -- struct support_subprocess result = support_suprocess_init (); -+ struct support_subprocess result = support_subprocess_init (); - - posix_spawn_file_actions_t fa; - /* posix_spawn_file_actions_init does not fail. */ -@@ -84,7 +84,7 @@ support_subprogram (const char *file, char *const argv[]) - xposix_spawn_file_actions_addclose (&fa, result.stdout_pipe[1]); - xposix_spawn_file_actions_addclose (&fa, result.stderr_pipe[1]); - -- result.pid = xposix_spawn (file, &fa, NULL, argv, NULL); -+ result.pid = xposix_spawn (file, &fa, NULL, argv, environ); - - xclose (result.stdout_pipe[1]); - xclose (result.stderr_pipe[1]); -@@ -92,6 +92,19 @@ support_subprogram (const char *file, char *const argv[]) - return result; - } - -+int -+support_subprogram_wait (const char *file, char *const argv[]) -+{ -+ posix_spawn_file_actions_t fa; -+ -+ posix_spawn_file_actions_init (&fa); -+ struct support_subprocess res = support_subprocess_init (); -+ -+ res.pid = xposix_spawn (file, &fa, NULL, argv, environ); -+ -+ return support_process_wait (&res); -+} -+ - int - support_process_wait (struct support_subprocess *proc) - { -diff --git a/support/xpthread_kill.c b/support/xpthread_kill.c -new file mode 100644 -index 0000000000..111a75d85e ---- /dev/null -+++ b/support/xpthread_kill.c -@@ -0,0 +1,26 @@ -+/* pthread_kill with error checking. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <signal.h> -+#include <support/xthread.h> -+ -+void -+xpthread_kill (pthread_t thr, int signo) -+{ -+ xpthread_check_return ("pthread_kill", pthread_kill (thr, signo)); -+} -diff --git a/support/xthread.h b/support/xthread.h -index 05f8d4a7d9..cb1fc30da0 100644 ---- a/support/xthread.h -+++ b/support/xthread.h -@@ -75,6 +75,8 @@ void xpthread_attr_setstacksize (pthread_attr_t *attr, - void xpthread_attr_setguardsize (pthread_attr_t *attr, - size_t guardsize); - -+void xpthread_kill (pthread_t thr, int signo); -+ - /* Set the stack size in ATTR to a small value, but still large enough - to cover most internal glibc stack usage. */ - void support_set_small_thread_stack_size (pthread_attr_t *attr); -diff --git a/sysdeps/aarch64/dl-bti.c b/sysdeps/aarch64/dl-bti.c -index 196e462520..cf7624aaa2 100644 ---- a/sysdeps/aarch64/dl-bti.c -+++ b/sysdeps/aarch64/dl-bti.c -@@ -19,43 +19,76 @@ - #include <errno.h> - #include <libintl.h> - #include <ldsodefs.h> -+#include <sys/mman.h> - --static int --enable_bti (struct link_map *map, const char *program) -+/* See elf/dl-load.h. */ -+#ifndef MAP_COPY -+# define MAP_COPY (MAP_PRIVATE | MAP_DENYWRITE) -+#endif -+ -+/* Enable BTI protection for MAP. */ -+ -+void -+_dl_bti_protect (struct link_map *map, int fd) - { -+ const size_t pagesz = GLRO(dl_pagesize); - const ElfW(Phdr) *phdr; -- unsigned prot; - - for (phdr = map->l_phdr; phdr < &map->l_phdr[map->l_phnum]; ++phdr) - if (phdr->p_type == PT_LOAD && (phdr->p_flags & PF_X)) - { -- void *start = (void *) (phdr->p_vaddr + map->l_addr); -- size_t len = phdr->p_memsz; -+ size_t vstart = ALIGN_DOWN (phdr->p_vaddr, pagesz); -+ size_t vend = ALIGN_UP (phdr->p_vaddr + phdr->p_filesz, pagesz); -+ off_t off = ALIGN_DOWN (phdr->p_offset, pagesz); -+ void *start = (void *) (vstart + map->l_addr); -+ size_t len = vend - vstart; - -- prot = PROT_EXEC | PROT_BTI; -+ unsigned prot = PROT_EXEC | PROT_BTI; - if (phdr->p_flags & PF_R) - prot |= PROT_READ; - if (phdr->p_flags & PF_W) - prot |= PROT_WRITE; - -- if (__mprotect (start, len, prot) < 0) -- { -- if (program) -- _dl_fatal_printf ("%s: mprotect failed to turn on BTI\n", -- map->l_name); -- else -- _dl_signal_error (errno, map->l_name, "dlopen", -- N_("mprotect failed to turn on BTI")); -- } -+ if (fd == -1) -+ /* Ignore failures for kernel mapped binaries. */ -+ __mprotect (start, len, prot); -+ else -+ map->l_mach.bti_fail = __mmap (start, len, prot, -+ MAP_FIXED|MAP_COPY|MAP_FILE, -+ fd, off) == MAP_FAILED; - } -- return 0; - } - --/* Enable BTI for L if required. */ -+ -+static void -+bti_failed (struct link_map *l, const char *program) -+{ -+ if (program) -+ _dl_fatal_printf ("%s: %s: failed to turn on BTI protection\n", -+ program, l->l_name); -+ else -+ /* Note: the errno value is not available any more. */ -+ _dl_signal_error (0, l->l_name, "dlopen", -+ N_("failed to turn on BTI protection")); -+} -+ -+ -+/* Enable BTI for L and its dependencies. */ - - void - _dl_bti_check (struct link_map *l, const char *program) - { -- if (GLRO(dl_aarch64_cpu_features).bti && l->l_mach.bti) -- enable_bti (l, program); -+ if (!GLRO(dl_aarch64_cpu_features).bti) -+ return; -+ -+ if (l->l_mach.bti_fail) -+ bti_failed (l, program); -+ -+ unsigned int i = l->l_searchlist.r_nlist; -+ while (i-- > 0) -+ { -+ struct link_map *dep = l->l_initfini[i]; -+ if (dep->l_mach.bti_fail) -+ bti_failed (dep, program); -+ } - } -diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h -index 70b9ed3925..fde7cfd9e2 100644 ---- a/sysdeps/aarch64/dl-machine.h -+++ b/sysdeps/aarch64/dl-machine.h -@@ -395,13 +395,6 @@ elf_machine_lazy_rel (struct link_map *map, - /* Check for unexpected PLT reloc type. */ - if (__builtin_expect (r_type == AARCH64_R(JUMP_SLOT), 1)) - { -- if (map->l_mach.plt == 0) -- { -- /* Prelinking. */ -- *reloc_addr += l_addr; -- return; -- } -- - if (__glibc_unlikely (map->l_info[DT_AARCH64 (VARIANT_PCS)] != NULL)) - { - /* Check the symbol table for variant PCS symbols. */ -@@ -425,7 +418,10 @@ elf_machine_lazy_rel (struct link_map *map, - } - } - -- *reloc_addr = map->l_mach.plt; -+ if (map->l_mach.plt == 0) -+ *reloc_addr += l_addr; -+ else -+ *reloc_addr = map->l_mach.plt; - } - else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1)) - { -diff --git a/sysdeps/aarch64/dl-prop.h b/sysdeps/aarch64/dl-prop.h -index b0785bda83..e926e54984 100644 ---- a/sysdeps/aarch64/dl-prop.h -+++ b/sysdeps/aarch64/dl-prop.h -@@ -19,6 +19,8 @@ - #ifndef _DL_PROP_H - #define _DL_PROP_H - -+extern void _dl_bti_protect (struct link_map *, int) attribute_hidden; -+ - extern void _dl_bti_check (struct link_map *, const char *) - attribute_hidden; - -@@ -35,14 +37,18 @@ _dl_open_check (struct link_map *m) - } - - static inline void __attribute__ ((always_inline)) --_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph) -+_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph) - { - } - - static inline int --_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz, -- void *data) -+_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type, -+ uint32_t datasz, void *data) - { -+ if (!GLRO(dl_aarch64_cpu_features).bti) -+ /* Skip note processing. */ -+ return 0; -+ - if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) - { - /* Stop if the property note is ill-formed. */ -@@ -51,7 +57,7 @@ _dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz, - - unsigned int feature_1 = *(unsigned int *) data; - if (feature_1 & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) -- l->l_mach.bti = true; -+ _dl_bti_protect (l, fd); - - /* Stop if we processed the property note. */ - return 0; -diff --git a/sysdeps/aarch64/linkmap.h b/sysdeps/aarch64/linkmap.h -index 847a03ace2..b3f7663b07 100644 ---- a/sysdeps/aarch64/linkmap.h -+++ b/sysdeps/aarch64/linkmap.h -@@ -22,5 +22,5 @@ struct link_map_machine - { - ElfW(Addr) plt; /* Address of .plt */ - void *tlsdesc_table; /* Address of TLS descriptor hash table. */ -- bool bti; /* Branch Target Identification is enabled. */ -+ bool bti_fail; /* Failed to enable Branch Target Identification. */ - }; -diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c -index 7cf5f033e8..799d60c98c 100644 ---- a/sysdeps/aarch64/multiarch/memcpy.c -+++ b/sysdeps/aarch64/multiarch/memcpy.c -@@ -41,7 +41,8 @@ libc_ifunc (__libc_memcpy, - ? __memcpy_falkor - : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) - ? __memcpy_thunderx2 -- : (IS_NEOVERSE_N1 (midr) -+ : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr) -+ || IS_NEOVERSE_V1 (midr) - ? __memcpy_simd - : __memcpy_generic))))); - -diff --git a/sysdeps/aarch64/multiarch/memcpy_advsimd.S b/sysdeps/aarch64/multiarch/memcpy_advsimd.S -index d4ba747777..48bb6d7ca4 100644 ---- a/sysdeps/aarch64/multiarch/memcpy_advsimd.S -+++ b/sysdeps/aarch64/multiarch/memcpy_advsimd.S -@@ -223,12 +223,13 @@ L(copy_long_backwards): - b.ls L(copy64_from_start) - - L(loop64_backwards): -- stp A_q, B_q, [dstend, -32] -+ str B_q, [dstend, -16] -+ str A_q, [dstend, -32] - ldp A_q, B_q, [srcend, -96] -- stp C_q, D_q, [dstend, -64] -+ str D_q, [dstend, -48] -+ str C_q, [dstend, -64]! - ldp C_q, D_q, [srcend, -128] - sub srcend, srcend, 64 -- sub dstend, dstend, 64 - subs count, count, 64 - b.hi L(loop64_backwards) - -diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c -index ad10aa8ac6..46a4cb3a54 100644 ---- a/sysdeps/aarch64/multiarch/memmove.c -+++ b/sysdeps/aarch64/multiarch/memmove.c -@@ -41,7 +41,8 @@ libc_ifunc (__libc_memmove, - ? __memmove_falkor - : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) - ? __memmove_thunderx2 -- : (IS_NEOVERSE_N1 (midr) -+ : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr) -+ || IS_NEOVERSE_V1 (midr) - ? __memmove_simd - : __memmove_generic))))); - -diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S -index 75393e1c18..1998ea95d4 100644 ---- a/sysdeps/aarch64/start.S -+++ b/sysdeps/aarch64/start.S -@@ -43,11 +43,9 @@ - */ - - .text -- .globl _start -- .type _start,#function --_start: -- BTI_C -+ENTRY(_start) - /* Create an initial frame with 0 LR and FP */ -+ cfi_undefined (x30) - mov x29, #0 - mov x30, #0 - -@@ -101,8 +99,10 @@ _start: - because crt1.o and rcrt1.o share code and the later must avoid the - use of GOT relocations before __libc_start_main is called. */ - __wrap_main: -+ BTI_C - b main - #endif -+END(_start) - - /* Define a symbol for the first piece of initialized data. */ - .data -diff --git a/sysdeps/generic/dl-prop.h b/sysdeps/generic/dl-prop.h -index f1cf576fe3..df27ff8e6a 100644 ---- a/sysdeps/generic/dl-prop.h -+++ b/sysdeps/generic/dl-prop.h -@@ -37,15 +37,15 @@ _dl_open_check (struct link_map *m) - } - - static inline void __attribute__ ((always_inline)) --_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph) -+_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph) - { - } - - /* Called for each property in the NT_GNU_PROPERTY_TYPE_0 note of L, - processing of the properties continues until this returns 0. */ - static inline int __attribute__ ((always_inline)) --_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz, -- void *data) -+_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type, -+ uint32_t datasz, void *data) - { - return 0; - } -diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h -index ba114ab4b1..62ac40d81b 100644 ---- a/sysdeps/generic/ldsodefs.h -+++ b/sysdeps/generic/ldsodefs.h -@@ -919,8 +919,9 @@ extern void _dl_rtld_di_serinfo (struct link_map *loader, - Dl_serinfo *si, bool counting); - - /* Process PT_GNU_PROPERTY program header PH in module L after -- PT_LOAD segments are mapped. */ --void _dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph); -+ PT_LOAD segments are mapped from file FD. */ -+void _dl_process_pt_gnu_property (struct link_map *l, int fd, -+ const ElfW(Phdr) *ph); - - - /* Search loaded objects' symbol tables for a definition of the symbol -diff --git a/sysdeps/generic/unwind.h b/sysdeps/generic/unwind.h -index b667a5b652..c229603af3 100644 ---- a/sysdeps/generic/unwind.h -+++ b/sysdeps/generic/unwind.h -@@ -75,15 +75,21 @@ typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code, - - struct _Unwind_Exception - { -- _Unwind_Exception_Class exception_class; -- _Unwind_Exception_Cleanup_Fn exception_cleanup; -- _Unwind_Word private_1; -- _Unwind_Word private_2; -- -- /* @@@ The IA-64 ABI says that this structure must be double-word aligned. -- Taking that literally does not make much sense generically. Instead we -- provide the maximum alignment required by any type for the machine. */ --} __attribute__((__aligned__)); -+ union -+ { -+ struct -+ { -+ _Unwind_Exception_Class exception_class; -+ _Unwind_Exception_Cleanup_Fn exception_cleanup; -+ _Unwind_Word private_1; -+ _Unwind_Word private_2; -+ }; -+ -+ /* The IA-64 ABI says that this structure must be double-word aligned. */ -+ _Unwind_Word unwind_exception_align[2] -+ __attribute__ ((__aligned__ (2 * sizeof (_Unwind_Word)))); -+ }; -+}; - - - /* The ACTIONS argument to the personality routine is a bitwise OR of one -diff --git a/sysdeps/gnu/errlist.h b/sysdeps/gnu/errlist.h -index 5d11ed723d..6329e5f393 100644 ---- a/sysdeps/gnu/errlist.h -+++ b/sysdeps/gnu/errlist.h -@@ -1,24 +1,21 @@ --#ifndef ERR_MAP --#define ERR_MAP(value) value --#endif --_S(ERR_MAP(0), N_("Success")) -+_S(0, N_("Success")) - #ifdef EPERM - /* - TRANS Only the owner of the file (or other resource) - TRANS or processes with special privileges can perform the operation. */ --_S(ERR_MAP(EPERM), N_("Operation not permitted")) -+_S(EPERM, N_("Operation not permitted")) - #endif - #ifdef ENOENT - /* - TRANS This is a ``file doesn't exist'' error - TRANS for ordinary files that are referenced in contexts where they are - TRANS expected to already exist. */ --_S(ERR_MAP(ENOENT), N_("No such file or directory")) -+_S(ENOENT, N_("No such file or directory")) - #endif - #ifdef ESRCH - /* - TRANS No process matches the specified process ID. */ --_S(ERR_MAP(ESRCH), N_("No such process")) -+_S(ESRCH, N_("No such process")) - #endif - #ifdef EINTR - /* -@@ -29,12 +26,12 @@ TRANS - TRANS You can choose to have functions resume after a signal that is handled, - TRANS rather than failing with @code{EINTR}; see @ref{Interrupted - TRANS Primitives}. */ --_S(ERR_MAP(EINTR), N_("Interrupted system call")) -+_S(EINTR, N_("Interrupted system call")) - #endif - #ifdef EIO - /* - TRANS Usually used for physical read or write errors. */ --_S(ERR_MAP(EIO), N_("Input/output error")) -+_S(EIO, N_("Input/output error")) - #endif - #ifdef ENXIO - /* -@@ -43,7 +40,7 @@ TRANS represented by a file you specified, and it couldn't find the device. - TRANS This can mean that the device file was installed incorrectly, or that - TRANS the physical device is missing or not correctly attached to the - TRANS computer. */ --_S(ERR_MAP(ENXIO), N_("No such device or address")) -+_S(ENXIO, N_("No such device or address")) - #endif - #ifdef E2BIG - /* -@@ -51,27 +48,27 @@ TRANS Used when the arguments passed to a new program - TRANS being executed with one of the @code{exec} functions (@pxref{Executing a - TRANS File}) occupy too much memory space. This condition never arises on - TRANS @gnuhurdsystems{}. */ --_S(ERR_MAP(E2BIG), N_("Argument list too long")) -+_S(E2BIG, N_("Argument list too long")) - #endif - #ifdef ENOEXEC - /* - TRANS Invalid executable file format. This condition is detected by the - TRANS @code{exec} functions; see @ref{Executing a File}. */ --_S(ERR_MAP(ENOEXEC), N_("Exec format error")) -+_S(ENOEXEC, N_("Exec format error")) - #endif - #ifdef EBADF - /* - TRANS For example, I/O on a descriptor that has been - TRANS closed or reading from a descriptor open only for writing (or vice - TRANS versa). */ --_S(ERR_MAP(EBADF), N_("Bad file descriptor")) -+_S(EBADF, N_("Bad file descriptor")) - #endif - #ifdef ECHILD - /* - TRANS This error happens on operations that are - TRANS supposed to manipulate child processes, when there aren't any processes - TRANS to manipulate. */ --_S(ERR_MAP(ECHILD), N_("No child processes")) -+_S(ECHILD, N_("No child processes")) - #endif - #ifdef EDEADLK - /* -@@ -79,74 +76,74 @@ TRANS Allocating a system resource would have resulted in a - TRANS deadlock situation. The system does not guarantee that it will notice - TRANS all such situations. This error means you got lucky and the system - TRANS noticed; it might just hang. @xref{File Locks}, for an example. */ --_S(ERR_MAP(EDEADLK), N_("Resource deadlock avoided")) -+_S(EDEADLK, N_("Resource deadlock avoided")) - #endif - #ifdef ENOMEM - /* - TRANS The system cannot allocate more virtual memory - TRANS because its capacity is full. */ --_S(ERR_MAP(ENOMEM), N_("Cannot allocate memory")) -+_S(ENOMEM, N_("Cannot allocate memory")) - #endif - #ifdef EACCES - /* - TRANS The file permissions do not allow the attempted operation. */ --_S(ERR_MAP(EACCES), N_("Permission denied")) -+_S(EACCES, N_("Permission denied")) - #endif - #ifdef EFAULT - /* - TRANS An invalid pointer was detected. - TRANS On @gnuhurdsystems{}, this error never happens; you get a signal instead. */ --_S(ERR_MAP(EFAULT), N_("Bad address")) -+_S(EFAULT, N_("Bad address")) - #endif - #ifdef ENOTBLK - /* - TRANS A file that isn't a block special file was given in a situation that - TRANS requires one. For example, trying to mount an ordinary file as a file - TRANS system in Unix gives this error. */ --_S(ERR_MAP(ENOTBLK), N_("Block device required")) -+_S(ENOTBLK, N_("Block device required")) - #endif - #ifdef EBUSY - /* - TRANS A system resource that can't be shared is already in use. - TRANS For example, if you try to delete a file that is the root of a currently - TRANS mounted filesystem, you get this error. */ --_S(ERR_MAP(EBUSY), N_("Device or resource busy")) -+_S(EBUSY, N_("Device or resource busy")) - #endif - #ifdef EEXIST - /* - TRANS An existing file was specified in a context where it only - TRANS makes sense to specify a new file. */ --_S(ERR_MAP(EEXIST), N_("File exists")) -+_S(EEXIST, N_("File exists")) - #endif - #ifdef EXDEV - /* - TRANS An attempt to make an improper link across file systems was detected. - TRANS This happens not only when you use @code{link} (@pxref{Hard Links}) but - TRANS also when you rename a file with @code{rename} (@pxref{Renaming Files}). */ --_S(ERR_MAP(EXDEV), N_("Invalid cross-device link")) -+_S(EXDEV, N_("Invalid cross-device link")) - #endif - #ifdef ENODEV - /* - TRANS The wrong type of device was given to a function that expects a - TRANS particular sort of device. */ --_S(ERR_MAP(ENODEV), N_("No such device")) -+_S(ENODEV, N_("No such device")) - #endif - #ifdef ENOTDIR - /* - TRANS A file that isn't a directory was specified when a directory is required. */ --_S(ERR_MAP(ENOTDIR), N_("Not a directory")) -+_S(ENOTDIR, N_("Not a directory")) - #endif - #ifdef EISDIR - /* - TRANS You cannot open a directory for writing, - TRANS or create or remove hard links to it. */ --_S(ERR_MAP(EISDIR), N_("Is a directory")) -+_S(EISDIR, N_("Is a directory")) - #endif - #ifdef EINVAL - /* - TRANS This is used to indicate various kinds of problems - TRANS with passing the wrong argument to a library function. */ --_S(ERR_MAP(EINVAL), N_("Invalid argument")) -+_S(EINVAL, N_("Invalid argument")) - #endif - #ifdef EMFILE - /* -@@ -157,20 +154,20 @@ TRANS In BSD and GNU, the number of open files is controlled by a resource - TRANS limit that can usually be increased. If you get this error, you might - TRANS want to increase the @code{RLIMIT_NOFILE} limit or make it unlimited; - TRANS @pxref{Limits on Resources}. */ --_S(ERR_MAP(EMFILE), N_("Too many open files")) -+_S(EMFILE, N_("Too many open files")) - #endif - #ifdef ENFILE - /* - TRANS There are too many distinct file openings in the entire system. Note - TRANS that any number of linked channels count as just one file opening; see - TRANS @ref{Linked Channels}. This error never occurs on @gnuhurdsystems{}. */ --_S(ERR_MAP(ENFILE), N_("Too many open files in system")) -+_S(ENFILE, N_("Too many open files in system")) - #endif - #ifdef ENOTTY - /* - TRANS Inappropriate I/O control operation, such as trying to set terminal - TRANS modes on an ordinary file. */ --_S(ERR_MAP(ENOTTY), N_("Inappropriate ioctl for device")) -+_S(ENOTTY, N_("Inappropriate ioctl for device")) - #endif - #ifdef ETXTBSY - /* -@@ -179,35 +176,35 @@ TRANS write to a file that is currently being executed. Often using a - TRANS debugger to run a program is considered having it open for writing and - TRANS will cause this error. (The name stands for ``text file busy''.) This - TRANS is not an error on @gnuhurdsystems{}; the text is copied as necessary. */ --_S(ERR_MAP(ETXTBSY), N_("Text file busy")) -+_S(ETXTBSY, N_("Text file busy")) - #endif - #ifdef EFBIG - /* - TRANS The size of a file would be larger than allowed by the system. */ --_S(ERR_MAP(EFBIG), N_("File too large")) -+_S(EFBIG, N_("File too large")) - #endif - #ifdef ENOSPC - /* - TRANS Write operation on a file failed because the - TRANS disk is full. */ --_S(ERR_MAP(ENOSPC), N_("No space left on device")) -+_S(ENOSPC, N_("No space left on device")) - #endif - #ifdef ESPIPE - /* - TRANS Invalid seek operation (such as on a pipe). */ --_S(ERR_MAP(ESPIPE), N_("Illegal seek")) -+_S(ESPIPE, N_("Illegal seek")) - #endif - #ifdef EROFS - /* - TRANS An attempt was made to modify something on a read-only file system. */ --_S(ERR_MAP(EROFS), N_("Read-only file system")) -+_S(EROFS, N_("Read-only file system")) - #endif - #ifdef EMLINK - /* - TRANS The link count of a single file would become too large. - TRANS @code{rename} can cause this error if the file being renamed already has - TRANS as many links as it can take (@pxref{Renaming Files}). */ --_S(ERR_MAP(EMLINK), N_("Too many links")) -+_S(EMLINK, N_("Too many links")) - #endif - #ifdef EPIPE - /* -@@ -216,19 +213,19 @@ TRANS Every library function that returns this error code also generates a - TRANS @code{SIGPIPE} signal; this signal terminates the program if not handled - TRANS or blocked. Thus, your program will never actually see @code{EPIPE} - TRANS unless it has handled or blocked @code{SIGPIPE}. */ --_S(ERR_MAP(EPIPE), N_("Broken pipe")) -+_S(EPIPE, N_("Broken pipe")) - #endif - #ifdef EDOM - /* - TRANS Used by mathematical functions when an argument value does - TRANS not fall into the domain over which the function is defined. */ --_S(ERR_MAP(EDOM), N_("Numerical argument out of domain")) -+_S(EDOM, N_("Numerical argument out of domain")) - #endif - #ifdef ERANGE - /* - TRANS Used by mathematical functions when the result value is - TRANS not representable because of overflow or underflow. */ --_S(ERR_MAP(ERANGE), N_("Numerical result out of range")) -+_S(ERANGE, N_("Numerical result out of range")) - #endif - #ifdef EAGAIN - /* -@@ -261,7 +258,7 @@ TRANS Such shortages are usually fairly serious and affect the whole system, - TRANS so usually an interactive program should report the error to the user - TRANS and return to its command loop. - TRANS @end itemize */ --_S(ERR_MAP(EAGAIN), N_("Resource temporarily unavailable")) -+_S(EAGAIN, N_("Resource temporarily unavailable")) - #endif - #ifdef EINPROGRESS - /* -@@ -273,47 +270,47 @@ TRANS the operation has begun and will take some time. Attempts to manipulate - TRANS the object before the call completes return @code{EALREADY}. You can - TRANS use the @code{select} function to find out when the pending operation - TRANS has completed; @pxref{Waiting for I/O}. */ --_S(ERR_MAP(EINPROGRESS), N_("Operation now in progress")) -+_S(EINPROGRESS, N_("Operation now in progress")) - #endif - #ifdef EALREADY - /* - TRANS An operation is already in progress on an object that has non-blocking - TRANS mode selected. */ --_S(ERR_MAP(EALREADY), N_("Operation already in progress")) -+_S(EALREADY, N_("Operation already in progress")) - #endif - #ifdef ENOTSOCK - /* - TRANS A file that isn't a socket was specified when a socket is required. */ --_S(ERR_MAP(ENOTSOCK), N_("Socket operation on non-socket")) -+_S(ENOTSOCK, N_("Socket operation on non-socket")) - #endif - #ifdef EMSGSIZE - /* - TRANS The size of a message sent on a socket was larger than the supported - TRANS maximum size. */ --_S(ERR_MAP(EMSGSIZE), N_("Message too long")) -+_S(EMSGSIZE, N_("Message too long")) - #endif - #ifdef EPROTOTYPE - /* - TRANS The socket type does not support the requested communications protocol. */ --_S(ERR_MAP(EPROTOTYPE), N_("Protocol wrong type for socket")) -+_S(EPROTOTYPE, N_("Protocol wrong type for socket")) - #endif - #ifdef ENOPROTOOPT - /* - TRANS You specified a socket option that doesn't make sense for the - TRANS particular protocol being used by the socket. @xref{Socket Options}. */ --_S(ERR_MAP(ENOPROTOOPT), N_("Protocol not available")) -+_S(ENOPROTOOPT, N_("Protocol not available")) - #endif - #ifdef EPROTONOSUPPORT - /* - TRANS The socket domain does not support the requested communications protocol - TRANS (perhaps because the requested protocol is completely invalid). - TRANS @xref{Creating a Socket}. */ --_S(ERR_MAP(EPROTONOSUPPORT), N_("Protocol not supported")) -+_S(EPROTONOSUPPORT, N_("Protocol not supported")) - #endif - #ifdef ESOCKTNOSUPPORT - /* - TRANS The socket type is not supported. */ --_S(ERR_MAP(ESOCKTNOSUPPORT), N_("Socket type not supported")) -+_S(ESOCKTNOSUPPORT, N_("Socket type not supported")) - #endif - #ifdef EOPNOTSUPP - /* -@@ -323,71 +320,71 @@ TRANS implemented for all communications protocols. On @gnuhurdsystems{}, this - TRANS error can happen for many calls when the object does not support the - TRANS particular operation; it is a generic indication that the server knows - TRANS nothing to do for that call. */ --_S(ERR_MAP(EOPNOTSUPP), N_("Operation not supported")) -+_S(EOPNOTSUPP, N_("Operation not supported")) - #endif - #ifdef EPFNOSUPPORT - /* - TRANS The socket communications protocol family you requested is not supported. */ --_S(ERR_MAP(EPFNOSUPPORT), N_("Protocol family not supported")) -+_S(EPFNOSUPPORT, N_("Protocol family not supported")) - #endif - #ifdef EAFNOSUPPORT - /* - TRANS The address family specified for a socket is not supported; it is - TRANS inconsistent with the protocol being used on the socket. @xref{Sockets}. */ --_S(ERR_MAP(EAFNOSUPPORT), N_("Address family not supported by protocol")) -+_S(EAFNOSUPPORT, N_("Address family not supported by protocol")) - #endif - #ifdef EADDRINUSE - /* - TRANS The requested socket address is already in use. @xref{Socket Addresses}. */ --_S(ERR_MAP(EADDRINUSE), N_("Address already in use")) -+_S(EADDRINUSE, N_("Address already in use")) - #endif - #ifdef EADDRNOTAVAIL - /* - TRANS The requested socket address is not available; for example, you tried - TRANS to give a socket a name that doesn't match the local host name. - TRANS @xref{Socket Addresses}. */ --_S(ERR_MAP(EADDRNOTAVAIL), N_("Cannot assign requested address")) -+_S(EADDRNOTAVAIL, N_("Cannot assign requested address")) - #endif - #ifdef ENETDOWN - /* - TRANS A socket operation failed because the network was down. */ --_S(ERR_MAP(ENETDOWN), N_("Network is down")) -+_S(ENETDOWN, N_("Network is down")) - #endif - #ifdef ENETUNREACH - /* - TRANS A socket operation failed because the subnet containing the remote host - TRANS was unreachable. */ --_S(ERR_MAP(ENETUNREACH), N_("Network is unreachable")) -+_S(ENETUNREACH, N_("Network is unreachable")) - #endif - #ifdef ENETRESET - /* - TRANS A network connection was reset because the remote host crashed. */ --_S(ERR_MAP(ENETRESET), N_("Network dropped connection on reset")) -+_S(ENETRESET, N_("Network dropped connection on reset")) - #endif - #ifdef ECONNABORTED - /* - TRANS A network connection was aborted locally. */ --_S(ERR_MAP(ECONNABORTED), N_("Software caused connection abort")) -+_S(ECONNABORTED, N_("Software caused connection abort")) - #endif - #ifdef ECONNRESET - /* - TRANS A network connection was closed for reasons outside the control of the - TRANS local host, such as by the remote machine rebooting or an unrecoverable - TRANS protocol violation. */ --_S(ERR_MAP(ECONNRESET), N_("Connection reset by peer")) -+_S(ECONNRESET, N_("Connection reset by peer")) - #endif - #ifdef ENOBUFS - /* - TRANS The kernel's buffers for I/O operations are all in use. In GNU, this - TRANS error is always synonymous with @code{ENOMEM}; you may get one or the - TRANS other from network operations. */ --_S(ERR_MAP(ENOBUFS), N_("No buffer space available")) -+_S(ENOBUFS, N_("No buffer space available")) - #endif - #ifdef EISCONN - /* - TRANS You tried to connect a socket that is already connected. - TRANS @xref{Connecting}. */ --_S(ERR_MAP(EISCONN), N_("Transport endpoint is already connected")) -+_S(EISCONN, N_("Transport endpoint is already connected")) - #endif - #ifdef ENOTCONN - /* -@@ -395,74 +392,74 @@ TRANS The socket is not connected to anything. You get this error when you - TRANS try to transmit data over a socket, without first specifying a - TRANS destination for the data. For a connectionless socket (for datagram - TRANS protocols, such as UDP), you get @code{EDESTADDRREQ} instead. */ --_S(ERR_MAP(ENOTCONN), N_("Transport endpoint is not connected")) -+_S(ENOTCONN, N_("Transport endpoint is not connected")) - #endif - #ifdef EDESTADDRREQ - /* - TRANS No default destination address was set for the socket. You get this - TRANS error when you try to transmit data over a connectionless socket, - TRANS without first specifying a destination for the data with @code{connect}. */ --_S(ERR_MAP(EDESTADDRREQ), N_("Destination address required")) -+_S(EDESTADDRREQ, N_("Destination address required")) - #endif - #ifdef ESHUTDOWN - /* - TRANS The socket has already been shut down. */ --_S(ERR_MAP(ESHUTDOWN), N_("Cannot send after transport endpoint shutdown")) -+_S(ESHUTDOWN, N_("Cannot send after transport endpoint shutdown")) - #endif - #ifdef ETOOMANYREFS --_S(ERR_MAP(ETOOMANYREFS), N_("Too many references: cannot splice")) -+_S(ETOOMANYREFS, N_("Too many references: cannot splice")) - #endif - #ifdef ETIMEDOUT - /* - TRANS A socket operation with a specified timeout received no response during - TRANS the timeout period. */ --_S(ERR_MAP(ETIMEDOUT), N_("Connection timed out")) -+_S(ETIMEDOUT, N_("Connection timed out")) - #endif - #ifdef ECONNREFUSED - /* - TRANS A remote host refused to allow the network connection (typically because - TRANS it is not running the requested service). */ --_S(ERR_MAP(ECONNREFUSED), N_("Connection refused")) -+_S(ECONNREFUSED, N_("Connection refused")) - #endif - #ifdef ELOOP - /* - TRANS Too many levels of symbolic links were encountered in looking up a file name. - TRANS This often indicates a cycle of symbolic links. */ --_S(ERR_MAP(ELOOP), N_("Too many levels of symbolic links")) -+_S(ELOOP, N_("Too many levels of symbolic links")) - #endif - #ifdef ENAMETOOLONG - /* - TRANS Filename too long (longer than @code{PATH_MAX}; @pxref{Limits for - TRANS Files}) or host name too long (in @code{gethostname} or - TRANS @code{sethostname}; @pxref{Host Identification}). */ --_S(ERR_MAP(ENAMETOOLONG), N_("File name too long")) -+_S(ENAMETOOLONG, N_("File name too long")) - #endif - #ifdef EHOSTDOWN - /* - TRANS The remote host for a requested network connection is down. */ --_S(ERR_MAP(EHOSTDOWN), N_("Host is down")) -+_S(EHOSTDOWN, N_("Host is down")) - #endif - /* - TRANS The remote host for a requested network connection is not reachable. */ - #ifdef EHOSTUNREACH --_S(ERR_MAP(EHOSTUNREACH), N_("No route to host")) -+_S(EHOSTUNREACH, N_("No route to host")) - #endif - #ifdef ENOTEMPTY - /* - TRANS Directory not empty, where an empty directory was expected. Typically, - TRANS this error occurs when you are trying to delete a directory. */ --_S(ERR_MAP(ENOTEMPTY), N_("Directory not empty")) -+_S(ENOTEMPTY, N_("Directory not empty")) - #endif - #ifdef EUSERS - /* - TRANS The file quota system is confused because there are too many users. - TRANS @c This can probably happen in a GNU system when using NFS. */ --_S(ERR_MAP(EUSERS), N_("Too many users")) -+_S(EUSERS, N_("Too many users")) - #endif - #ifdef EDQUOT - /* - TRANS The user's disk quota was exceeded. */ --_S(ERR_MAP(EDQUOT), N_("Disk quota exceeded")) -+_S(EDQUOT, N_("Disk quota exceeded")) - #endif - #ifdef ESTALE - /* -@@ -471,7 +468,7 @@ TRANS file system which is due to file system rearrangements on the server host - TRANS for NFS file systems or corruption in other file systems. - TRANS Repairing this condition usually requires unmounting, possibly repairing - TRANS and remounting the file system. */ --_S(ERR_MAP(ESTALE), N_("Stale file handle")) -+_S(ESTALE, N_("Stale file handle")) - #endif - #ifdef EREMOTE - /* -@@ -479,7 +476,7 @@ TRANS An attempt was made to NFS-mount a remote file system with a file name tha - TRANS already specifies an NFS-mounted file. - TRANS (This is an error on some operating systems, but we expect it to work - TRANS properly on @gnuhurdsystems{}, making this error code impossible.) */ --_S(ERR_MAP(EREMOTE), N_("Object is remote")) -+_S(EREMOTE, N_("Object is remote")) - #endif - #ifdef ENOLCK - /* -@@ -487,7 +484,7 @@ TRANS This is used by the file locking facilities; see - TRANS @ref{File Locks}. This error is never generated by @gnuhurdsystems{}, but - TRANS it can result from an operation to an NFS server running another - TRANS operating system. */ --_S(ERR_MAP(ENOLCK), N_("No locks available")) -+_S(ENOLCK, N_("No locks available")) - #endif - #ifdef ENOSYS - /* -@@ -496,46 +493,46 @@ TRANS not implemented at all, either in the C library itself or in the - TRANS operating system. When you get this error, you can be sure that this - TRANS particular function will always fail with @code{ENOSYS} unless you - TRANS install a new version of the C library or the operating system. */ --_S(ERR_MAP(ENOSYS), N_("Function not implemented")) -+_S(ENOSYS, N_("Function not implemented")) - #endif - #ifdef EILSEQ - /* - TRANS While decoding a multibyte character the function came along an invalid - TRANS or an incomplete sequence of bytes or the given wide character is invalid. */ --_S(ERR_MAP(EILSEQ), N_("Invalid or incomplete multibyte or wide character")) -+_S(EILSEQ, N_("Invalid or incomplete multibyte or wide character")) - #endif - #ifdef EBADMSG --_S(ERR_MAP(EBADMSG), N_("Bad message")) -+_S(EBADMSG, N_("Bad message")) - #endif - #ifdef EIDRM --_S(ERR_MAP(EIDRM), N_("Identifier removed")) -+_S(EIDRM, N_("Identifier removed")) - #endif - #ifdef EMULTIHOP --_S(ERR_MAP(EMULTIHOP), N_("Multihop attempted")) -+_S(EMULTIHOP, N_("Multihop attempted")) - #endif - #ifdef ENODATA --_S(ERR_MAP(ENODATA), N_("No data available")) -+_S(ENODATA, N_("No data available")) - #endif - #ifdef ENOLINK --_S(ERR_MAP(ENOLINK), N_("Link has been severed")) -+_S(ENOLINK, N_("Link has been severed")) - #endif - #ifdef ENOMSG --_S(ERR_MAP(ENOMSG), N_("No message of desired type")) -+_S(ENOMSG, N_("No message of desired type")) - #endif - #ifdef ENOSR --_S(ERR_MAP(ENOSR), N_("Out of streams resources")) -+_S(ENOSR, N_("Out of streams resources")) - #endif - #ifdef ENOSTR --_S(ERR_MAP(ENOSTR), N_("Device not a stream")) -+_S(ENOSTR, N_("Device not a stream")) - #endif - #ifdef EOVERFLOW --_S(ERR_MAP(EOVERFLOW), N_("Value too large for defined data type")) -+_S(EOVERFLOW, N_("Value too large for defined data type")) - #endif - #ifdef EPROTO --_S(ERR_MAP(EPROTO), N_("Protocol error")) -+_S(EPROTO, N_("Protocol error")) - #endif - #ifdef ETIME --_S(ERR_MAP(ETIME), N_("Timer expired")) -+_S(ETIME, N_("Timer expired")) - #endif - #ifdef ECANCELED - /* -@@ -543,148 +540,148 @@ TRANS An asynchronous operation was canceled before it - TRANS completed. @xref{Asynchronous I/O}. When you call @code{aio_cancel}, - TRANS the normal result is for the operations affected to complete with this - TRANS error; @pxref{Cancel AIO Operations}. */ --_S(ERR_MAP(ECANCELED), N_("Operation canceled")) -+_S(ECANCELED, N_("Operation canceled")) - #endif - #ifdef EOWNERDEAD --_S(ERR_MAP(EOWNERDEAD), N_("Owner died")) -+_S(EOWNERDEAD, N_("Owner died")) - #endif - #ifdef ENOTRECOVERABLE --_S(ERR_MAP(ENOTRECOVERABLE), N_("State not recoverable")) -+_S(ENOTRECOVERABLE, N_("State not recoverable")) - #endif - #ifdef ERESTART --_S(ERR_MAP(ERESTART), N_("Interrupted system call should be restarted")) -+_S(ERESTART, N_("Interrupted system call should be restarted")) - #endif - #ifdef ECHRNG --_S(ERR_MAP(ECHRNG), N_("Channel number out of range")) -+_S(ECHRNG, N_("Channel number out of range")) - #endif - #ifdef EL2NSYNC --_S(ERR_MAP(EL2NSYNC), N_("Level 2 not synchronized")) -+_S(EL2NSYNC, N_("Level 2 not synchronized")) - #endif - #ifdef EL3HLT --_S(ERR_MAP(EL3HLT), N_("Level 3 halted")) -+_S(EL3HLT, N_("Level 3 halted")) - #endif - #ifdef EL3RST --_S(ERR_MAP(EL3RST), N_("Level 3 reset")) -+_S(EL3RST, N_("Level 3 reset")) - #endif - #ifdef ELNRNG --_S(ERR_MAP(ELNRNG), N_("Link number out of range")) -+_S(ELNRNG, N_("Link number out of range")) - #endif - #ifdef EUNATCH --_S(ERR_MAP(EUNATCH), N_("Protocol driver not attached")) -+_S(EUNATCH, N_("Protocol driver not attached")) - #endif - #ifdef ENOCSI --_S(ERR_MAP(ENOCSI), N_("No CSI structure available")) -+_S(ENOCSI, N_("No CSI structure available")) - #endif - #ifdef EL2HLT --_S(ERR_MAP(EL2HLT), N_("Level 2 halted")) -+_S(EL2HLT, N_("Level 2 halted")) - #endif - #ifdef EBADE --_S(ERR_MAP(EBADE), N_("Invalid exchange")) -+_S(EBADE, N_("Invalid exchange")) - #endif - #ifdef EBADR --_S(ERR_MAP(EBADR), N_("Invalid request descriptor")) -+_S(EBADR, N_("Invalid request descriptor")) - #endif - #ifdef EXFULL --_S(ERR_MAP(EXFULL), N_("Exchange full")) -+_S(EXFULL, N_("Exchange full")) - #endif - #ifdef ENOANO --_S(ERR_MAP(ENOANO), N_("No anode")) -+_S(ENOANO, N_("No anode")) - #endif - #ifdef EBADRQC --_S(ERR_MAP(EBADRQC), N_("Invalid request code")) -+_S(EBADRQC, N_("Invalid request code")) - #endif - #ifdef EBADSLT --_S(ERR_MAP(EBADSLT), N_("Invalid slot")) -+_S(EBADSLT, N_("Invalid slot")) - #endif - #ifdef EBFONT --_S(ERR_MAP(EBFONT), N_("Bad font file format")) -+_S(EBFONT, N_("Bad font file format")) - #endif - #ifdef ENONET --_S(ERR_MAP(ENONET), N_("Machine is not on the network")) -+_S(ENONET, N_("Machine is not on the network")) - #endif - #ifdef ENOPKG --_S(ERR_MAP(ENOPKG), N_("Package not installed")) -+_S(ENOPKG, N_("Package not installed")) - #endif - #ifdef EADV --_S(ERR_MAP(EADV), N_("Advertise error")) -+_S(EADV, N_("Advertise error")) - #endif - #ifdef ESRMNT --_S(ERR_MAP(ESRMNT), N_("Srmount error")) -+_S(ESRMNT, N_("Srmount error")) - #endif - #ifdef ECOMM --_S(ERR_MAP(ECOMM), N_("Communication error on send")) -+_S(ECOMM, N_("Communication error on send")) - #endif - #ifdef EDOTDOT --_S(ERR_MAP(EDOTDOT), N_("RFS specific error")) -+_S(EDOTDOT, N_("RFS specific error")) - #endif - #ifdef ENOTUNIQ --_S(ERR_MAP(ENOTUNIQ), N_("Name not unique on network")) -+_S(ENOTUNIQ, N_("Name not unique on network")) - #endif - #ifdef EBADFD --_S(ERR_MAP(EBADFD), N_("File descriptor in bad state")) -+_S(EBADFD, N_("File descriptor in bad state")) - #endif - #ifdef EREMCHG --_S(ERR_MAP(EREMCHG), N_("Remote address changed")) -+_S(EREMCHG, N_("Remote address changed")) - #endif - #ifdef ELIBACC --_S(ERR_MAP(ELIBACC), N_("Can not access a needed shared library")) -+_S(ELIBACC, N_("Can not access a needed shared library")) - #endif - #ifdef ELIBBAD --_S(ERR_MAP(ELIBBAD), N_("Accessing a corrupted shared library")) -+_S(ELIBBAD, N_("Accessing a corrupted shared library")) - #endif - #ifdef ELIBSCN --_S(ERR_MAP(ELIBSCN), N_(".lib section in a.out corrupted")) -+_S(ELIBSCN, N_(".lib section in a.out corrupted")) - #endif - #ifdef ELIBMAX --_S(ERR_MAP(ELIBMAX), N_("Attempting to link in too many shared libraries")) -+_S(ELIBMAX, N_("Attempting to link in too many shared libraries")) - #endif - #ifdef ELIBEXEC --_S(ERR_MAP(ELIBEXEC), N_("Cannot exec a shared library directly")) -+_S(ELIBEXEC, N_("Cannot exec a shared library directly")) - #endif - #ifdef ESTRPIPE --_S(ERR_MAP(ESTRPIPE), N_("Streams pipe error")) -+_S(ESTRPIPE, N_("Streams pipe error")) - #endif - #ifdef EUCLEAN --_S(ERR_MAP(EUCLEAN), N_("Structure needs cleaning")) -+_S(EUCLEAN, N_("Structure needs cleaning")) - #endif - #ifdef ENOTNAM --_S(ERR_MAP(ENOTNAM), N_("Not a XENIX named type file")) -+_S(ENOTNAM, N_("Not a XENIX named type file")) - #endif - #ifdef ENAVAIL --_S(ERR_MAP(ENAVAIL), N_("No XENIX semaphores available")) -+_S(ENAVAIL, N_("No XENIX semaphores available")) - #endif - #ifdef EISNAM --_S(ERR_MAP(EISNAM), N_("Is a named type file")) -+_S(EISNAM, N_("Is a named type file")) - #endif - #ifdef EREMOTEIO --_S(ERR_MAP(EREMOTEIO), N_("Remote I/O error")) -+_S(EREMOTEIO, N_("Remote I/O error")) - #endif - #ifdef ENOMEDIUM --_S(ERR_MAP(ENOMEDIUM), N_("No medium found")) -+_S(ENOMEDIUM, N_("No medium found")) - #endif - #ifdef EMEDIUMTYPE --_S(ERR_MAP(EMEDIUMTYPE), N_("Wrong medium type")) -+_S(EMEDIUMTYPE, N_("Wrong medium type")) - #endif - #ifdef ENOKEY --_S(ERR_MAP(ENOKEY), N_("Required key not available")) -+_S(ENOKEY, N_("Required key not available")) - #endif - #ifdef EKEYEXPIRED --_S(ERR_MAP(EKEYEXPIRED), N_("Key has expired")) -+_S(EKEYEXPIRED, N_("Key has expired")) - #endif - #ifdef EKEYREVOKED --_S(ERR_MAP(EKEYREVOKED), N_("Key has been revoked")) -+_S(EKEYREVOKED, N_("Key has been revoked")) - #endif - #ifdef EKEYREJECTED --_S(ERR_MAP(EKEYREJECTED), N_("Key was rejected by service")) -+_S(EKEYREJECTED, N_("Key was rejected by service")) - #endif - #ifdef ERFKILL --_S(ERR_MAP(ERFKILL), N_("Operation not possible due to RF-kill")) -+_S(ERFKILL, N_("Operation not possible due to RF-kill")) - #endif - #ifdef EHWPOISON --_S(ERR_MAP(EHWPOISON), N_("Memory page has hardware error")) -+_S(EHWPOISON, N_("Memory page has hardware error")) - #endif - #ifdef EBADRPC --_S(ERR_MAP(EBADRPC), N_("RPC struct is bad")) -+_S(EBADRPC, N_("RPC struct is bad")) - #endif - #ifdef EFTYPE - /* -@@ -693,40 +690,40 @@ TRANS operation, or a data file had the wrong format. - TRANS - TRANS On some systems @code{chmod} returns this error if you try to set the - TRANS sticky bit on a non-directory file; @pxref{Setting Permissions}. */ --_S(ERR_MAP(EFTYPE), N_("Inappropriate file type or format")) -+_S(EFTYPE, N_("Inappropriate file type or format")) - #endif - #ifdef EPROCUNAVAIL --_S(ERR_MAP(EPROCUNAVAIL), N_("RPC bad procedure for program")) -+_S(EPROCUNAVAIL, N_("RPC bad procedure for program")) - #endif - #ifdef EAUTH --_S(ERR_MAP(EAUTH), N_("Authentication error")) -+_S(EAUTH, N_("Authentication error")) - #endif - #ifdef EDIED - /* - TRANS On @gnuhurdsystems{}, opening a file returns this error when the file is - TRANS translated by a program and the translator program dies while starting - TRANS up, before it has connected to the file. */ --_S(ERR_MAP(EDIED), N_("Translator died")) -+_S(EDIED, N_("Translator died")) - #endif - #ifdef ERPCMISMATCH --_S(ERR_MAP(ERPCMISMATCH), N_("RPC version wrong")) -+_S(ERPCMISMATCH, N_("RPC version wrong")) - #endif - #ifdef EGREGIOUS - /* - TRANS You did @strong{what}? */ --_S(ERR_MAP(EGREGIOUS), N_("You really blew it this time")) -+_S(EGREGIOUS, N_("You really blew it this time")) - #endif - #ifdef EPROCLIM - /* - TRANS This means that the per-user limit on new process would be exceeded by - TRANS an attempted @code{fork}. @xref{Limits on Resources}, for details on - TRANS the @code{RLIMIT_NPROC} limit. */ --_S(ERR_MAP(EPROCLIM), N_("Too many processes")) -+_S(EPROCLIM, N_("Too many processes")) - #endif - #ifdef EGRATUITOUS - /* - TRANS This error code has no purpose. */ --_S(ERR_MAP(EGRATUITOUS), N_("Gratuitous error")) -+_S(EGRATUITOUS, N_("Gratuitous error")) - #endif - #if defined (ENOTSUP) && ENOTSUP != EOPNOTSUPP - /* -@@ -742,10 +739,10 @@ TRANS values. - TRANS - TRANS If the entire function is not available at all in the implementation, - TRANS it returns @code{ENOSYS} instead. */ --_S(ERR_MAP(ENOTSUP), N_("Not supported")) -+_S(ENOTSUP, N_("Not supported")) - #endif - #ifdef EPROGMISMATCH --_S(ERR_MAP(EPROGMISMATCH), N_("RPC program version wrong")) -+_S(EPROGMISMATCH, N_("RPC program version wrong")) - #endif - #ifdef EBACKGROUND - /* -@@ -755,7 +752,7 @@ TRANS foreground process group of the terminal. Users do not usually see this - TRANS error because functions such as @code{read} and @code{write} translate - TRANS it into a @code{SIGTTIN} or @code{SIGTTOU} signal. @xref{Job Control}, - TRANS for information on process groups and these signals. */ --_S(ERR_MAP(EBACKGROUND), N_("Inappropriate operation for background process")) -+_S(EBACKGROUND, N_("Inappropriate operation for background process")) - #endif - #ifdef EIEIO - /* -@@ -773,7 +770,7 @@ TRANS @c "bought the farm" means "died". -jtobey - TRANS @c - TRANS @c Translators, please do not translate this litteraly, translate it into - TRANS @c an idiomatic funny way of saying that the computer died. */ --_S(ERR_MAP(EIEIO), N_("Computer bought the farm")) -+_S(EIEIO, N_("Computer bought the farm")) - #endif - #if defined (EWOULDBLOCK) && EWOULDBLOCK != EAGAIN - /* -@@ -782,18 +779,18 @@ TRANS The values are always the same, on every operating system. - TRANS - TRANS C libraries in many older Unix systems have @code{EWOULDBLOCK} as a - TRANS separate error code. */ --_S(ERR_MAP(EWOULDBLOCK), N_("Operation would block")) -+_S(EWOULDBLOCK, N_("Operation would block")) - #endif - #ifdef ENEEDAUTH --_S(ERR_MAP(ENEEDAUTH), N_("Need authenticator")) -+_S(ENEEDAUTH, N_("Need authenticator")) - #endif - #ifdef ED - /* - TRANS The experienced user will know what is wrong. - TRANS @c This error code is a joke. Its perror text is part of the joke. - TRANS @c Don't change it. */ --_S(ERR_MAP(ED), N_("?")) -+_S(ED, N_("?")) - #endif - #ifdef EPROGUNAVAIL --_S(ERR_MAP(EPROGUNAVAIL), N_("RPC program not available")) -+_S(EPROGUNAVAIL, N_("RPC program not available")) - #endif -diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h -index 0f08079e48..672d8f27ce 100644 ---- a/sysdeps/i386/dl-machine.h -+++ b/sysdeps/i386/dl-machine.h -@@ -338,16 +338,22 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, - { - # ifndef RTLD_BOOTSTRAP - if (sym_map != map -- && sym_map->l_type != lt_executable - && !sym_map->l_relocated) - { - const char *strtab - = (const char *) D_PTR (map, l_info[DT_STRTAB]); -- _dl_error_printf ("\ -+ if (sym_map->l_type == lt_executable) -+ _dl_fatal_printf ("\ -+%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \ -+and creates an unsatisfiable circular dependency.\n", -+ RTLD_PROGNAME, strtab + refsym->st_name, -+ map->l_name); -+ else -+ _dl_error_printf ("\ - %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", -- RTLD_PROGNAME, map->l_name, -- sym_map->l_name, -- strtab + refsym->st_name); -+ RTLD_PROGNAME, map->l_name, -+ sym_map->l_name, -+ strtab + refsym->st_name); - } - # endif - value = ((Elf32_Addr (*) (void)) value) (); -diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c -index 8a53a1088f..362a2b713c 100644 ---- a/sysdeps/powerpc/powerpc64/backtrace.c -+++ b/sysdeps/powerpc/powerpc64/backtrace.c -@@ -54,11 +54,22 @@ struct signal_frame_64 { - /* We don't care about the rest, since the IP value is at 'uc' field. */ - }; - -+/* Test if the address match to the inside the trampoline code. -+ Up to and including kernel 5.8, returning from an interrupt or syscall to a -+ signal handler starts execution directly at the handler's entry point, with -+ LR set to address of the sigreturn trampoline (the vDSO symbol). -+ Newer kernels will branch to signal handler from the trampoline instead, so -+ checking the stacktrace against the vDSO entrypoint does not work in such -+ case. -+ The vDSO branches with a 'bctrl' instruction, so checking either the -+ vDSO address itself and the next instruction should cover all kernel -+ versions. */ - static inline bool - is_sigtramp_address (void *nip) - { - #ifdef HAVE_SIGTRAMP_RT64 -- if (nip == GLRO (dl_vdso_sigtramp_rt64)) -+ if (nip == GLRO (dl_vdso_sigtramp_rt64) || -+ nip == GLRO (dl_vdso_sigtramp_rt64) + 4) - return true; - #endif - return false; -diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile -index 920d875420..bf9b7f7223 100644 ---- a/sysdeps/pthread/Makefile -+++ b/sysdeps/pthread/Makefile -@@ -107,6 +107,7 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \ - tst-unload \ - tst-unwind-thread \ - tst-pt-vfork1 tst-pt-vfork2 tst-vfork1x tst-vfork2x \ -+ tst-pthread-exit-signal \ - - - # Files which must not be linked with libpthread. -diff --git a/sysdeps/pthread/tst-pthread-exit-signal.c b/sysdeps/pthread/tst-pthread-exit-signal.c -new file mode 100644 -index 0000000000..b4526fe663 ---- /dev/null -+++ b/sysdeps/pthread/tst-pthread-exit-signal.c -@@ -0,0 +1,45 @@ -+/* Test that pending signals are not delivered on thread exit (bug 28607). -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+/* Due to bug 28607, pthread_kill (or pthread_cancel) restored the -+ signal mask during during thread exit, triggering the delivery of a -+ blocked pending signal (SIGUSR1 in this test). */ -+ -+#include <support/xthread.h> -+#include <support/xsignal.h> -+ -+static void * -+threadfunc (void *closure) -+{ -+ sigset_t sigmask; -+ sigfillset (&sigmask); -+ xpthread_sigmask (SIG_SETMASK, &sigmask, NULL); -+ xpthread_kill (pthread_self (), SIGUSR1); -+ pthread_exit (NULL); -+ return NULL; -+} -+ -+static int -+do_test (void) -+{ -+ pthread_t thr = xpthread_create (NULL, threadfunc, NULL); -+ xpthread_join (thr); -+ return 0; -+} -+ -+#include <support/test-driver.c> -diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure -index fa46e9e351..e7f576338d 100644 ---- a/sysdeps/s390/configure -+++ b/sysdeps/s390/configure -@@ -123,7 +123,9 @@ void testinsn (char *buf) - __asm__ (".machine \"arch13\" \n\t" - ".machinemode \"zarch_nohighgprs\" \n\t" - "lghi %%r0,16 \n\t" -- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); -+ "mvcrl 0(%0),32(%0) \n\t" -+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2" -+ : : "a" (buf) : "memory", "r0"); - } - EOF - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c -@@ -271,7 +273,9 @@ else - void testinsn (char *buf) - { - __asm__ ("lghi %%r0,16 \n\t" -- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); -+ "mvcrl 0(%0),32(%0) \n\t" -+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2" -+ : : "a" (buf) : "memory", "r0"); - } - EOF - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c -diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac -index 3ed5a8ef87..5c3479e8cf 100644 ---- a/sysdeps/s390/configure.ac -+++ b/sysdeps/s390/configure.ac -@@ -88,7 +88,9 @@ void testinsn (char *buf) - __asm__ (".machine \"arch13\" \n\t" - ".machinemode \"zarch_nohighgprs\" \n\t" - "lghi %%r0,16 \n\t" -- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); -+ "mvcrl 0(%0),32(%0) \n\t" -+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2" -+ : : "a" (buf) : "memory", "r0"); - } - EOF - dnl test, if assembler supports S390 arch13 instructions -@@ -195,7 +197,9 @@ cat > conftest.c <<\EOF - void testinsn (char *buf) - { - __asm__ ("lghi %%r0,16 \n\t" -- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); -+ "mvcrl 0(%0),32(%0) \n\t" -+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2" -+ : : "a" (buf) : "memory", "r0"); - } - EOF - dnl test, if assembler supports S390 arch13 zarch instructions as default -diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c -index 5fc85e129f..ee59b5de14 100644 ---- a/sysdeps/s390/memmove.c -+++ b/sysdeps/s390/memmove.c -@@ -43,7 +43,7 @@ extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden; - s390_libc_ifunc_expr (__redirect_memmove, memmove, - ({ - s390_libc_ifunc_expr_stfle_init (); -- (HAVE_MEMMOVE_ARCH13 -+ (HAVE_MEMMOVE_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2) - && S390_IS_ARCH13_MIE3 (stfle_bits)) - ? MEMMOVE_ARCH13 - : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX)) -diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c -index e6195c6e26..17c0cc3952 100644 ---- a/sysdeps/s390/multiarch/ifunc-impl-list.c -+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c -@@ -171,7 +171,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - IFUNC_IMPL (i, name, memmove, - # if HAVE_MEMMOVE_ARCH13 - IFUNC_IMPL_ADD (array, i, memmove, -- S390_IS_ARCH13_MIE3 (stfle_bits), -+ ((dl_hwcap & HWCAP_S390_VXRS_EXT2) -+ && S390_IS_ARCH13_MIE3 (stfle_bits)), - MEMMOVE_ARCH13) - # endif - # if HAVE_MEMMOVE_Z13 -diff --git a/sysdeps/sh/be/sh4/fpu/Implies b/sysdeps/sh/be/sh4/fpu/Implies -new file mode 100644 -index 0000000000..71b28ee1a4 ---- /dev/null -+++ b/sysdeps/sh/be/sh4/fpu/Implies -@@ -0,0 +1 @@ -+sh/sh4/fpu -diff --git a/sysdeps/sh/le/sh4/fpu/Implies b/sysdeps/sh/le/sh4/fpu/Implies -new file mode 100644 -index 0000000000..71b28ee1a4 ---- /dev/null -+++ b/sysdeps/sh/le/sh4/fpu/Implies -@@ -0,0 +1 @@ -+sh/sh4/fpu -diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile -index 9b2a253032..34748ffcd1 100644 ---- a/sysdeps/unix/sysv/linux/Makefile -+++ b/sysdeps/unix/sysv/linux/Makefile -@@ -100,7 +100,7 @@ tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \ - tst-quota tst-sync_file_range tst-sysconf-iov_max tst-ttyname \ - test-errno-linux tst-memfd_create tst-mlock2 tst-pkey \ - tst-rlimit-infinity tst-ofdlocks tst-gettid tst-gettid-kill \ -- tst-tgkill -+ tst-tgkill tst-sysvsem-linux tst-sysvmsg-linux tst-sysvshm-linux - tests-internal += tst-ofdlocks-compat tst-sigcontext-get_pc - - CFLAGS-tst-sigcontext-get_pc.c = -fasynchronous-unwind-tables -diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h -index fc688450ee..00a4d0c8e7 100644 ---- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h -+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h -@@ -54,6 +54,10 @@ - && MIDR_PARTNUM(midr) == 0x000) - #define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ - && MIDR_PARTNUM(midr) == 0xd0c) -+#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ -+ && MIDR_PARTNUM(midr) == 0xd49) -+#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ -+ && MIDR_PARTNUM(midr) == 0xd40) - - #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P' \ - && MIDR_PARTNUM(midr) == 0x000) -diff --git a/sysdeps/unix/sysv/linux/mq_notify.c b/sysdeps/unix/sysv/linux/mq_notify.c -index 61bbb03b64..2bb98172c8 100644 ---- a/sysdeps/unix/sysv/linux/mq_notify.c -+++ b/sysdeps/unix/sysv/linux/mq_notify.c -@@ -132,9 +132,12 @@ helper_thread (void *arg) - to wait until it is done with it. */ - (void) __pthread_barrier_wait (¬ify_barrier); - } -- else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED) -- /* The only state we keep is the copy of the thread attributes. */ -- free (data.attr); -+ else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED && data.attr != NULL) -+ { -+ /* The only state we keep is the copy of the thread attributes. */ -+ pthread_attr_destroy (data.attr); -+ free (data.attr); -+ } - } - return NULL; - } -@@ -255,8 +258,14 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification) - if (data.attr == NULL) - return -1; - -- memcpy (data.attr, notification->sigev_notify_attributes, -- sizeof (pthread_attr_t)); -+ int ret = __pthread_attr_copy (data.attr, -+ notification->sigev_notify_attributes); -+ if (ret != 0) -+ { -+ free (data.attr); -+ __set_errno (ret); -+ return -1; -+ } - } - - /* Construct the new request. */ -@@ -269,8 +278,11 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification) - int retval = INLINE_SYSCALL (mq_notify, 2, mqdes, &se); - - /* If it failed, free the allocated memory. */ -- if (__glibc_unlikely (retval != 0)) -- free (data.attr); -+ if (retval != 0 && data.attr != NULL) -+ { -+ pthread_attr_destroy (data.attr); -+ free (data.attr); -+ } - - return retval; - } -diff --git a/sysdeps/unix/sysv/linux/msgctl.c b/sysdeps/unix/sysv/linux/msgctl.c -index 0776472d5e..a1f24ab242 100644 ---- a/sysdeps/unix/sysv/linux/msgctl.c -+++ b/sysdeps/unix/sysv/linux/msgctl.c -@@ -90,8 +90,15 @@ __msgctl64 (int msqid, int cmd, struct __msqid64_ds *buf) - struct kernel_msqid64_ds ksemid, *arg = NULL; - if (buf != NULL) - { -- msqid64_to_kmsqid64 (buf, &ksemid); -- arg = &ksemid; -+ /* This is a Linux extension where kernel returns a 'struct msginfo' -+ instead. */ -+ if (cmd == IPC_INFO || cmd == MSG_INFO) -+ arg = (struct kernel_msqid64_ds *) buf; -+ else -+ { -+ msqid64_to_kmsqid64 (buf, &ksemid); -+ arg = &ksemid; -+ } - } - # ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T - if (cmd == IPC_SET) -@@ -169,8 +176,15 @@ __msgctl (int msqid, int cmd, struct msqid_ds *buf) - struct __msqid64_ds msqid64, *buf64 = NULL; - if (buf != NULL) - { -- msqid_to_msqid64 (&msqid64, buf); -- buf64 = &msqid64; -+ /* This is a Linux extension where kernel returns a 'struct msginfo' -+ instead. */ -+ if (cmd == IPC_INFO || cmd == MSG_INFO) -+ buf64 = (struct __msqid64_ds *) buf; -+ else -+ { -+ msqid_to_msqid64 (&msqid64, buf); -+ buf64 = &msqid64; -+ } - } - - int ret = __msgctl64 (msqid, cmd, buf64); -diff --git a/sysdeps/unix/sysv/linux/semctl.c b/sysdeps/unix/sysv/linux/semctl.c -index f131a26fc7..1cdabde8f2 100644 ---- a/sysdeps/unix/sysv/linux/semctl.c -+++ b/sysdeps/unix/sysv/linux/semctl.c -@@ -102,6 +102,7 @@ semun64_to_ksemun64 (int cmd, union semun64 semun64, - r.array = semun64.array; - break; - case SEM_STAT: -+ case SEM_STAT_ANY: - case IPC_STAT: - case IPC_SET: - r.buf = buf; -@@ -150,6 +151,7 @@ __semctl64 (int semid, int semnum, int cmd, ...) - case IPC_STAT: /* arg.buf */ - case IPC_SET: - case SEM_STAT: -+ case SEM_STAT_ANY: - case IPC_INFO: /* arg.__buf */ - case SEM_INFO: - va_start (ap, cmd); -@@ -238,6 +240,7 @@ semun_to_semun64 (int cmd, union semun semun, struct __semid64_ds *semid64) - r.array = semun.array; - break; - case SEM_STAT: -+ case SEM_STAT_ANY: - case IPC_STAT: - case IPC_SET: - r.buf = semid64; -@@ -267,6 +270,7 @@ __semctl (int semid, int semnum, int cmd, ...) - case IPC_STAT: /* arg.buf */ - case IPC_SET: - case SEM_STAT: -+ case SEM_STAT_ANY: - case IPC_INFO: /* arg.__buf */ - case SEM_INFO: - va_start (ap, cmd); -@@ -321,6 +325,7 @@ __semctl_mode16 (int semid, int semnum, int cmd, ...) - case IPC_STAT: /* arg.buf */ - case IPC_SET: - case SEM_STAT: -+ case SEM_STAT_ANY: - case IPC_INFO: /* arg.__buf */ - case SEM_INFO: - va_start (ap, cmd); -@@ -354,6 +359,7 @@ __old_semctl (int semid, int semnum, int cmd, ...) - case IPC_STAT: /* arg.buf */ - case IPC_SET: - case SEM_STAT: -+ case SEM_STAT_ANY: - case IPC_INFO: /* arg.__buf */ - case SEM_INFO: - va_start (ap, cmd); -diff --git a/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies -new file mode 100644 -index 0000000000..7eeaf15a5a ---- /dev/null -+++ b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies -@@ -0,0 +1 @@ -+unix/sysv/linux/sh/sh4/fpu -diff --git a/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies -new file mode 100644 -index 0000000000..7eeaf15a5a ---- /dev/null -+++ b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies -@@ -0,0 +1 @@ -+unix/sysv/linux/sh/sh4/fpu -diff --git a/sysdeps/unix/sysv/linux/shmctl.c b/sysdeps/unix/sysv/linux/shmctl.c -index 76d88441f1..1d19a798b1 100644 ---- a/sysdeps/unix/sysv/linux/shmctl.c -+++ b/sysdeps/unix/sysv/linux/shmctl.c -@@ -90,8 +90,15 @@ __shmctl64 (int shmid, int cmd, struct __shmid64_ds *buf) - struct kernel_shmid64_ds kshmid, *arg = NULL; - if (buf != NULL) - { -- shmid64_to_kshmid64 (buf, &kshmid); -- arg = &kshmid; -+ /* This is a Linux extension where kernel expects either a -+ 'struct shminfo' (IPC_INFO) or 'struct shm_info' (SHM_INFO). */ -+ if (cmd == IPC_INFO || cmd == SHM_INFO) -+ arg = (struct kernel_shmid64_ds *) buf; -+ else -+ { -+ shmid64_to_kshmid64 (buf, &kshmid); -+ arg = &kshmid; -+ } - } - # ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T - if (cmd == IPC_SET) -@@ -107,7 +114,6 @@ __shmctl64 (int shmid, int cmd, struct __shmid64_ds *buf) - - switch (cmd) - { -- case IPC_INFO: - case IPC_STAT: - case SHM_STAT: - case SHM_STAT_ANY: -@@ -168,8 +174,15 @@ __shmctl (int shmid, int cmd, struct shmid_ds *buf) - struct __shmid64_ds shmid64, *buf64 = NULL; - if (buf != NULL) - { -- shmid_to_shmid64 (&shmid64, buf); -- buf64 = &shmid64; -+ /* This is a Linux extension where kernel expects either a -+ 'struct shminfo' (IPC_INFO) or 'struct shm_info' (SHM_INFO). */ -+ if (cmd == IPC_INFO || cmd == SHM_INFO) -+ buf64 = (struct __shmid64_ds *) buf; -+ else -+ { -+ shmid_to_shmid64 (&shmid64, buf); -+ buf64 = &shmid64; -+ } - } - - int ret = __shmctl64 (shmid, cmd, buf64); -@@ -178,7 +191,6 @@ __shmctl (int shmid, int cmd, struct shmid_ds *buf) - - switch (cmd) - { -- case IPC_INFO: - case IPC_STAT: - case SHM_STAT: - case SHM_STAT_ANY: -diff --git a/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c b/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c -new file mode 100644 -index 0000000000..630f4f792c ---- /dev/null -+++ b/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c -@@ -0,0 +1,177 @@ -+/* Basic tests for Linux SYSV message queue extensions. -+ Copyright (C) 2020 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <sys/ipc.h> -+#include <sys/msg.h> -+#include <errno.h> -+#include <stdlib.h> -+#include <stdbool.h> -+#include <stdio.h> -+ -+#include <support/check.h> -+#include <support/temp_file.h> -+ -+#define MSGQ_MODE 0644 -+ -+/* These are for the temporary file we generate. */ -+static char *name; -+static int msqid; -+ -+static void -+remove_msq (void) -+{ -+ /* Enforce message queue removal in case of early test failure. -+ Ignore error since the msg may already have being removed. */ -+ msgctl (msqid, IPC_RMID, NULL); -+} -+ -+static void -+do_prepare (int argc, char *argv[]) -+{ -+ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvmsg.", &name) != -1); -+} -+ -+#define PREPARE do_prepare -+ -+struct test_msginfo -+{ -+ int msgmax; -+ int msgmnb; -+ int msgmni; -+}; -+ -+/* It tries to obtain some system-wide SysV messsage queue information from -+ /proc to check against IPC_INFO/MSG_INFO. The /proc only returns the -+ tunables value of MSGMAX, MSGMNB, and MSGMNI. -+ -+ The kernel also returns constant value for MSGSSZ, MSGSEG and also MSGMAP, -+ MSGPOOL, and MSGTQL (for IPC_INFO). The issue to check them is they might -+ change over kernel releases. */ -+ -+static int -+read_proc_file (const char *file) -+{ -+ FILE *f = fopen (file, "r"); -+ if (f == NULL) -+ FAIL_UNSUPPORTED ("/proc is not mounted or %s is not available", file); -+ -+ int v; -+ int r = fscanf (f, "%d", & v); -+ TEST_VERIFY_EXIT (r == 1); -+ -+ fclose (f); -+ return v; -+} -+ -+ -+/* Check if the message queue with IDX (index into the kernel's internal -+ array) matches the one with KEY. The CMD is either MSG_STAT or -+ MSG_STAT_ANY. */ -+ -+static bool -+check_msginfo (int idx, key_t key, int cmd) -+{ -+ struct msqid_ds msginfo; -+ int mid = msgctl (idx, cmd, &msginfo); -+ /* Ignore unused array slot returned by the kernel or information from -+ unknown message queue. */ -+ if ((mid == -1 && errno == EINVAL) || mid != msqid) -+ return false; -+ -+ if (mid == -1) -+ FAIL_EXIT1 ("msgctl with %s failed: %m", -+ cmd == MSG_STAT ? "MSG_STAT" : "MSG_STAT_ANY"); -+ -+ TEST_COMPARE (msginfo.msg_perm.__key, key); -+ TEST_COMPARE (msginfo.msg_perm.mode, MSGQ_MODE); -+ TEST_COMPARE (msginfo.msg_qnum, 0); -+ -+ return true; -+} -+ -+static int -+do_test (void) -+{ -+ atexit (remove_msq); -+ -+ key_t key = ftok (name, 'G'); -+ if (key == -1) -+ FAIL_EXIT1 ("ftok failed: %m"); -+ -+ msqid = msgget (key, MSGQ_MODE | IPC_CREAT); -+ if (msqid == -1) -+ FAIL_EXIT1 ("msgget failed: %m"); -+ -+ struct test_msginfo tipcinfo; -+ tipcinfo.msgmax = read_proc_file ("/proc/sys/kernel/msgmax"); -+ tipcinfo.msgmnb = read_proc_file ("/proc/sys/kernel/msgmnb"); -+ tipcinfo.msgmni = read_proc_file ("/proc/sys/kernel/msgmni"); -+ -+ int msqidx; -+ -+ { -+ struct msginfo ipcinfo; -+ msqidx = msgctl (msqid, IPC_INFO, (struct msqid_ds *) &ipcinfo); -+ if (msqidx == -1) -+ FAIL_EXIT1 ("msgctl with IPC_INFO failed: %m"); -+ -+ TEST_COMPARE (ipcinfo.msgmax, tipcinfo.msgmax); -+ TEST_COMPARE (ipcinfo.msgmnb, tipcinfo.msgmnb); -+ TEST_COMPARE (ipcinfo.msgmni, tipcinfo.msgmni); -+ } -+ -+ /* Same as before but with MSG_INFO. */ -+ { -+ struct msginfo ipcinfo; -+ msqidx = msgctl (msqid, MSG_INFO, (struct msqid_ds *) &ipcinfo); -+ if (msqidx == -1) -+ FAIL_EXIT1 ("msgctl with IPC_INFO failed: %m"); -+ -+ TEST_COMPARE (ipcinfo.msgmax, tipcinfo.msgmax); -+ TEST_COMPARE (ipcinfo.msgmnb, tipcinfo.msgmnb); -+ TEST_COMPARE (ipcinfo.msgmni, tipcinfo.msgmni); -+ } -+ -+ /* We check if the created message queue shows in global list. */ -+ bool found = false; -+ for (int i = 0; i <= msqidx; i++) -+ { -+ /* We can't tell apart if MSG_STAT_ANY is not supported (kernel older -+ than 4.17) or if the index used is invalid. So it just check if the -+ value returned from a valid call matches the created message -+ queue. */ -+ check_msginfo (i, key, MSG_STAT_ANY); -+ -+ if (check_msginfo (i, key, MSG_STAT)) -+ { -+ found = true; -+ break; -+ } -+ } -+ -+ if (!found) -+ FAIL_EXIT1 ("msgctl with MSG_STAT/MSG_STAT_ANY could not find the " -+ "created message queue"); -+ -+ if (msgctl (msqid, IPC_RMID, NULL) == -1) -+ FAIL_EXIT1 ("msgctl failed"); -+ -+ return 0; -+} -+ -+#include <support/test-driver.c> -diff --git a/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c b/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c -new file mode 100644 -index 0000000000..45f19e2d37 ---- /dev/null -+++ b/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c -@@ -0,0 +1,184 @@ -+/* Basic tests for Linux SYSV semaphore extensions. -+ Copyright (C) 2020 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <sys/ipc.h> -+#include <sys/sem.h> -+#include <errno.h> -+#include <stdlib.h> -+#include <stdbool.h> -+#include <stdio.h> -+ -+#include <support/check.h> -+#include <support/temp_file.h> -+ -+/* These are for the temporary file we generate. */ -+static char *name; -+static int semid; -+ -+static void -+remove_sem (void) -+{ -+ /* Enforce message queue removal in case of early test failure. -+ Ignore error since the sem may already have being removed. */ -+ semctl (semid, 0, IPC_RMID, 0); -+} -+ -+static void -+do_prepare (int argc, char *argv[]) -+{ -+ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvsem.", &name) != -1); -+} -+ -+#define PREPARE do_prepare -+ -+#define SEM_MODE 0644 -+ -+union semun -+{ -+ int val; -+ struct semid_ds *buf; -+ unsigned short *array; -+ struct seminfo *__buf; -+}; -+ -+struct test_seminfo -+{ -+ int semmsl; -+ int semmns; -+ int semopm; -+ int semmni; -+}; -+ -+/* It tries to obtain some system-wide SysV semaphore information from /proc -+ to check against IPC_INFO/SEM_INFO. The /proc only returns the tunables -+ value of SEMMSL, SEMMNS, SEMOPM, and SEMMNI. -+ -+ The kernel also returns constant value for SEMVMX, SEMMNU, SEMMAP, SEMUME, -+ and also SEMUSZ and SEMAEM (for IPC_INFO). The issue to check them is they -+ might change over kernel releases. */ -+ -+static void -+read_sem_stat (struct test_seminfo *tseminfo) -+{ -+ FILE *f = fopen ("/proc/sys/kernel/sem", "r"); -+ if (f == NULL) -+ FAIL_UNSUPPORTED ("/proc is not mounted or /proc/sys/kernel/sem is not " -+ "available"); -+ -+ int r = fscanf (f, "%d %d %d %d", -+ &tseminfo->semmsl, &tseminfo->semmns, &tseminfo->semopm, -+ &tseminfo->semmni); -+ TEST_VERIFY_EXIT (r == 4); -+ -+ fclose (f); -+} -+ -+ -+/* Check if the semaphore with IDX (index into the kernel's internal array) -+ matches the one with KEY. The CMD is either SEM_STAT or SEM_STAT_ANY. */ -+ -+static bool -+check_seminfo (int idx, key_t key, int cmd) -+{ -+ struct semid_ds seminfo; -+ int sid = semctl (idx, 0, cmd, (union semun) { .buf = &seminfo }); -+ /* Ignore unused array slot returned by the kernel or information from -+ unknown semaphores. */ -+ if ((sid == -1 && errno == EINVAL) || sid != semid) -+ return false; -+ -+ if (sid == -1) -+ FAIL_EXIT1 ("semctl with SEM_STAT failed (errno=%d)", errno); -+ -+ TEST_COMPARE (seminfo.sem_perm.__key, key); -+ TEST_COMPARE (seminfo.sem_perm.mode, SEM_MODE); -+ TEST_COMPARE (seminfo.sem_nsems, 1); -+ -+ return true; -+} -+ -+static int -+do_test (void) -+{ -+ atexit (remove_sem); -+ -+ key_t key = ftok (name, 'G'); -+ if (key == -1) -+ FAIL_EXIT1 ("ftok failed: %m"); -+ -+ semid = semget (key, 1, IPC_CREAT | IPC_EXCL | SEM_MODE); -+ if (semid == -1) -+ FAIL_EXIT1 ("semget failed: %m"); -+ -+ struct test_seminfo tipcinfo; -+ read_sem_stat (&tipcinfo); -+ -+ int semidx; -+ -+ { -+ struct seminfo ipcinfo; -+ semidx = semctl (semid, 0, IPC_INFO, (union semun) { .__buf = &ipcinfo }); -+ if (semidx == -1) -+ FAIL_EXIT1 ("semctl with IPC_INFO failed: %m"); -+ -+ TEST_COMPARE (ipcinfo.semmsl, tipcinfo.semmsl); -+ TEST_COMPARE (ipcinfo.semmns, tipcinfo.semmns); -+ TEST_COMPARE (ipcinfo.semopm, tipcinfo.semopm); -+ TEST_COMPARE (ipcinfo.semmni, tipcinfo.semmni); -+ } -+ -+ /* Same as before but with SEM_INFO. */ -+ { -+ struct seminfo ipcinfo; -+ semidx = semctl (semid, 0, SEM_INFO, (union semun) { .__buf = &ipcinfo }); -+ if (semidx == -1) -+ FAIL_EXIT1 ("semctl with IPC_INFO failed: %m"); -+ -+ TEST_COMPARE (ipcinfo.semmsl, tipcinfo.semmsl); -+ TEST_COMPARE (ipcinfo.semmns, tipcinfo.semmns); -+ TEST_COMPARE (ipcinfo.semopm, tipcinfo.semopm); -+ TEST_COMPARE (ipcinfo.semmni, tipcinfo.semmni); -+ } -+ -+ /* We check if the created semaphore shows in the system-wide status. */ -+ bool found = false; -+ for (int i = 0; i <= semidx; i++) -+ { -+ /* We can't tell apart if SEM_STAT_ANY is not supported (kernel older -+ than 4.17) or if the index used is invalid. So it just check if -+ value returned from a valid call matches the created semaphore. */ -+ check_seminfo (i, key, SEM_STAT_ANY); -+ -+ if (check_seminfo (i, key, SEM_STAT)) -+ { -+ found = true; -+ break; -+ } -+ } -+ -+ if (!found) -+ FAIL_EXIT1 ("semctl with SEM_STAT/SEM_STAT_ANY could not find the " -+ "created semaphore"); -+ -+ if (semctl (semid, 0, IPC_RMID, 0) == -1) -+ FAIL_EXIT1 ("semctl failed: %m"); -+ -+ return 0; -+} -+ -+#include <support/test-driver.c> -diff --git a/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c b/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c -new file mode 100644 -index 0000000000..bb154592a6 ---- /dev/null -+++ b/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c -@@ -0,0 +1,188 @@ -+/* Basic tests for Linux SYSV shared memory extensions. -+ Copyright (C) 2020 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <sys/ipc.h> -+#include <sys/shm.h> -+#include <errno.h> -+#include <stdlib.h> -+#include <stdbool.h> -+#include <stdio.h> -+#include <unistd.h> -+#include <inttypes.h> -+#include <limits.h> -+ -+#include <support/check.h> -+#include <support/temp_file.h> -+ -+#define SHM_MODE 0644 -+ -+/* These are for the temporary file we generate. */ -+static char *name; -+static int shmid; -+static long int pgsz; -+ -+static void -+remove_shm (void) -+{ -+ /* Enforce message queue removal in case of early test failure. -+ Ignore error since the shm may already have being removed. */ -+ shmctl (shmid, IPC_RMID, NULL); -+} -+ -+static void -+do_prepare (int argc, char *argv[]) -+{ -+ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvshm.", &name) != -1); -+} -+ -+#define PREPARE do_prepare -+ -+struct test_shminfo -+{ -+ __syscall_ulong_t shmall; -+ __syscall_ulong_t shmmax; -+ __syscall_ulong_t shmmni; -+}; -+ -+/* It tries to obtain some system-wide SysV shared memory information from -+ /proc to check against IPC_INFO/SHM_INFO. The /proc only returns the -+ tunables value of SHMALL, SHMMAX, and SHMMNI. */ -+ -+static uint64_t -+read_proc_file (const char *file) -+{ -+ FILE *f = fopen (file, "r"); -+ if (f == NULL) -+ FAIL_UNSUPPORTED ("/proc is not mounted or %s is not available", file); -+ -+ /* Handle 32-bit binaries running on 64-bit kernels. */ -+ uint64_t v; -+ int r = fscanf (f, "%" SCNu64, &v); -+ TEST_VERIFY_EXIT (r == 1); -+ -+ fclose (f); -+ return v; -+} -+ -+ -+/* Check if the message queue with IDX (index into the kernel's internal -+ array) matches the one with KEY. The CMD is either SHM_STAT or -+ SHM_STAT_ANY. */ -+ -+static bool -+check_shminfo (int idx, key_t key, int cmd) -+{ -+ struct shmid_ds shminfo; -+ int sid = shmctl (idx, cmd, &shminfo); -+ /* Ignore unused array slot returned by the kernel or information from -+ unknown message queue. */ -+ if ((sid == -1 && errno == EINVAL) || sid != shmid) -+ return false; -+ -+ if (sid == -1) -+ FAIL_EXIT1 ("shmctl with %s failed: %m", -+ cmd == SHM_STAT ? "SHM_STAT" : "SHM_STAT_ANY"); -+ -+ TEST_COMPARE (shminfo.shm_perm.__key, key); -+ TEST_COMPARE (shminfo.shm_perm.mode, SHM_MODE); -+ TEST_COMPARE (shminfo.shm_segsz, pgsz); -+ -+ return true; -+} -+ -+static int -+do_test (void) -+{ -+ atexit (remove_shm); -+ -+ pgsz = sysconf (_SC_PAGESIZE); -+ if (pgsz == -1) -+ FAIL_EXIT1 ("sysconf (_SC_PAGESIZE) failed: %m"); -+ -+ key_t key = ftok (name, 'G'); -+ if (key == -1) -+ FAIL_EXIT1 ("ftok failed: %m"); -+ -+ shmid = shmget (key, pgsz, IPC_CREAT | IPC_EXCL | SHM_MODE); -+ if (shmid == -1) -+ FAIL_EXIT1 ("shmget failed: %m"); -+ -+ /* It does not check shmmax because kernel clamp its value to INT_MAX for: -+ -+ 1. Compat symbols with IPC_64, i.e, 32-bit binaries running on 64-bit -+ kernels. -+ -+ 2. Default symbol without IPC_64 (defined as IPC_OLD within Linux) and -+ glibc always use IPC_64 for 32-bit ABIs (to support 64-bit time_t). -+ It means that 32-bit binaries running on 32-bit kernels will not see -+ shmmax being clamped. -+ -+ And finding out whether the compat symbol is used would require checking -+ the underlying kernel against the current ABI. The shmall and shmmni -+ already provided enough coverage. */ -+ -+ struct test_shminfo tipcinfo; -+ tipcinfo.shmall = read_proc_file ("/proc/sys/kernel/shmall"); -+ tipcinfo.shmmni = read_proc_file ("/proc/sys/kernel/shmmni"); -+ -+ int shmidx; -+ -+ /* Note: SHM_INFO does not return a shminfo, but rather a 'struct shm_info'. -+ It is tricky to verify its values since the syscall returns system wide -+ resources consumed by shared memory. The shmctl implementation handles -+ SHM_INFO as IPC_INFO, so the IPC_INFO test should validate SHM_INFO as -+ well. */ -+ -+ { -+ struct shminfo ipcinfo; -+ shmidx = shmctl (shmid, IPC_INFO, (struct shmid_ds *) &ipcinfo); -+ if (shmidx == -1) -+ FAIL_EXIT1 ("shmctl with IPC_INFO failed: %m"); -+ -+ TEST_COMPARE (ipcinfo.shmall, tipcinfo.shmall); -+ TEST_COMPARE (ipcinfo.shmmni, tipcinfo.shmmni); -+ } -+ -+ /* We check if the created shared memory shows in the global list. */ -+ bool found = false; -+ for (int i = 0; i <= shmidx; i++) -+ { -+ /* We can't tell apart if SHM_STAT_ANY is not supported (kernel older -+ than 4.17) or if the index used is invalid. So it just check if -+ value returned from a valid call matches the created message -+ queue. */ -+ check_shminfo (i, key, SHM_STAT_ANY); -+ -+ if (check_shminfo (i, key, SHM_STAT)) -+ { -+ found = true; -+ break; -+ } -+ } -+ -+ if (!found) -+ FAIL_EXIT1 ("shmctl with SHM_STAT/SHM_STAT_ANY could not find the " -+ "created shared memory"); -+ -+ if (shmctl (shmid, IPC_RMID, NULL) == -1) -+ FAIL_EXIT1 ("shmctl failed"); -+ -+ return 0; -+} -+ -+#include <support/test-driver.c> -diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile -index a6736aef25..9736a13e7b 100644 ---- a/sysdeps/x86/Makefile -+++ b/sysdeps/x86/Makefile -@@ -12,6 +12,12 @@ endif - ifeq ($(subdir),setjmp) - gen-as-const-headers += jmp_buf-ssp.sym - sysdep_routines += __longjmp_cancel -+ifneq ($(enable-cet),no) -+ifneq ($(have-tunables),no) -+tests += tst-setjmp-cet -+tst-setjmp-cet-ENV = GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on -+endif -+endif - endif - - ifeq ($(subdir),string) -diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c -index 217c21c34f..3fb4a028d8 100644 ---- a/sysdeps/x86/cacheinfo.c -+++ b/sysdeps/x86/cacheinfo.c -@@ -808,7 +808,7 @@ init_cacheinfo (void) - threads = 1 << ((ecx >> 12) & 0x0f); - } - -- if (threads == 0) -+ if (threads == 0 || cpu_features->basic.family >= 0x17) - { - /* If APIC ID width is not available, use logical - processor count. */ -@@ -823,8 +823,22 @@ init_cacheinfo (void) - if (threads > 0) - shared /= threads; - -- /* Account for exclusive L2 and L3 caches. */ -- shared += core; -+ /* Get shared cache per ccx for Zen architectures. */ -+ if (cpu_features->basic.family >= 0x17) -+ { -+ unsigned int eax; -+ -+ /* Get number of threads share the L3 cache in CCX. */ -+ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx); -+ -+ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1; -+ shared *= threads_per_ccx; -+ } -+ else -+ { -+ /* Account for exclusive L2 and L3 caches. */ -+ shared += core; -+ } - } - } - -@@ -854,14 +868,20 @@ init_cacheinfo (void) - __x86_shared_cache_size = shared; - } - -- /* The large memcpy micro benchmark in glibc shows that 6 times of -- shared cache size is the approximate value above which non-temporal -- store becomes faster on a 8-core processor. This is the 3/4 of the -- total shared cache size. */ -+ /* The default setting for the non_temporal threshold is 3/4 of one -+ thread's share of the chip's cache. For most Intel and AMD processors -+ with an initial release date between 2017 and 2020, a thread's typical -+ share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4 -+ threshold leaves 125 KBytes to 500 KBytes of the thread's data -+ in cache after a maximum temporal copy, which will maintain -+ in cache a reasonable portion of the thread's stack and other -+ active data. If the threshold is set higher than one thread's -+ share of the cache, it has a substantial risk of negatively -+ impacting the performance of other threads running on the chip. */ - __x86_shared_non_temporal_threshold - = (cpu_features->non_temporal_threshold != 0 - ? cpu_features->non_temporal_threshold -- : __x86_shared_cache_size * threads * 3 / 4); -+ : __x86_shared_cache_size * 3 / 4); - - /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */ - unsigned int minimum_rep_movsb_threshold; -diff --git a/sysdeps/x86/dl-cet.c b/sysdeps/x86/dl-cet.c -index 03572f7af6..3cc54a8d53 100644 ---- a/sysdeps/x86/dl-cet.c -+++ b/sysdeps/x86/dl-cet.c -@@ -47,7 +47,10 @@ dl_cet_check (struct link_map *m, const char *program) - /* No legacy object check if both IBT and SHSTK are always on. */ - if (enable_ibt_type == cet_always_on - && enable_shstk_type == cet_always_on) -- return; -+ { -+ THREAD_SETMEM (THREAD_SELF, header.feature_1, GL(dl_x86_feature_1)); -+ return; -+ } - - /* Check if IBT is enabled by kernel. */ - bool ibt_enabled -diff --git a/sysdeps/x86/dl-prop.h b/sysdeps/x86/dl-prop.h -index 89911e19e2..4eb3b85a7b 100644 ---- a/sysdeps/x86/dl-prop.h -+++ b/sysdeps/x86/dl-prop.h -@@ -145,15 +145,15 @@ _dl_process_cet_property_note (struct link_map *l, - } - - static inline void __attribute__ ((unused)) --_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph) -+_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph) - { - const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr); - _dl_process_cet_property_note (l, note, ph->p_memsz, ph->p_align); - } - - static inline int __attribute__ ((always_inline)) --_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz, -- void *data) -+_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type, -+ uint32_t datasz, void *data) - { - return 0; - } -diff --git a/sysdeps/x86/tst-setjmp-cet.c b/sysdeps/x86/tst-setjmp-cet.c -new file mode 100644 -index 0000000000..42c795d2a8 ---- /dev/null -+++ b/sysdeps/x86/tst-setjmp-cet.c -@@ -0,0 +1 @@ -+#include <setjmp/tst-setjmp.c> -diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure -old mode 100644 -new mode 100755 -index 84f82c2406..fc1840e23f ---- a/sysdeps/x86_64/configure -+++ b/sysdeps/x86_64/configure -@@ -107,39 +107,6 @@ if test x"$build_mathvec" = xnotset; then - build_mathvec=yes - fi - --if test "$static_pie" = yes; then -- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5 --$as_echo_n "checking for linker static PIE support... " >&6; } --if ${libc_cv_ld_static_pie+:} false; then : -- $as_echo_n "(cached) " >&6 --else -- cat > conftest.s <<\EOF -- .text -- .global _start -- .weak foo --_start: -- leaq foo(%rip), %rax --EOF -- libc_cv_pie_option="-Wl,-pie" -- if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5' -- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 -- (eval $ac_try) 2>&5 -- ac_status=$? -- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 -- test $ac_status = 0; }; }; then -- libc_cv_ld_static_pie=yes -- else -- libc_cv_ld_static_pie=no -- fi --rm -f conftest* --fi --{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5 --$as_echo "$libc_cv_ld_static_pie" >&6; } -- if test "$libc_cv_ld_static_pie" != yes; then -- as_fn_error $? "linker support for static PIE needed" "$LINENO" 5 -- fi --fi -- - $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h - - -diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac -index cdaba0c075..611a7d9ba3 100644 ---- a/sysdeps/x86_64/configure.ac -+++ b/sysdeps/x86_64/configure.ac -@@ -53,31 +53,6 @@ if test x"$build_mathvec" = xnotset; then - build_mathvec=yes - fi - --dnl Check if linker supports static PIE with the fix for --dnl --dnl https://sourceware.org/bugzilla/show_bug.cgi?id=21782 --dnl --if test "$static_pie" = yes; then -- AC_CACHE_CHECK(for linker static PIE support, libc_cv_ld_static_pie, [dnl --cat > conftest.s <<\EOF -- .text -- .global _start -- .weak foo --_start: -- leaq foo(%rip), %rax --EOF -- libc_cv_pie_option="-Wl,-pie" -- if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&AS_MESSAGE_LOG_FD); then -- libc_cv_ld_static_pie=yes -- else -- libc_cv_ld_static_pie=no -- fi --rm -f conftest*]) -- if test "$libc_cv_ld_static_pie" != yes; then -- AC_MSG_ERROR([linker support for static PIE needed]) -- fi --fi -- - dnl It is always possible to access static and hidden symbols in an - dnl position independent way. - AC_DEFINE(PI_STATIC_AND_HIDDEN) -diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h -index ca73d8fef9..363a749cb2 100644 ---- a/sysdeps/x86_64/dl-machine.h -+++ b/sysdeps/x86_64/dl-machine.h -@@ -315,16 +315,22 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, - { - # ifndef RTLD_BOOTSTRAP - if (sym_map != map -- && sym_map->l_type != lt_executable - && !sym_map->l_relocated) - { - const char *strtab - = (const char *) D_PTR (map, l_info[DT_STRTAB]); -- _dl_error_printf ("\ -+ if (sym_map->l_type == lt_executable) -+ _dl_fatal_printf ("\ -+%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \ -+and creates an unsatisfiable circular dependency.\n", -+ RTLD_PROGNAME, strtab + refsym->st_name, -+ map->l_name); -+ else -+ _dl_error_printf ("\ - %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", -- RTLD_PROGNAME, map->l_name, -- sym_map->l_name, -- strtab + refsym->st_name); -+ RTLD_PROGNAME, map->l_name, -+ sym_map->l_name, -+ strtab + refsym->st_name); - } - # endif - value = ((ElfW(Addr) (*) (void)) value) (); -diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h -index 7659758972..e5fd5ac9cb 100644 ---- a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h -+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h -@@ -32,7 +32,7 @@ IFUNC_SELECTOR (void) - && CPU_FEATURE_USABLE_P (cpu_features, AVX2)) - return OPTIMIZE (fma); - -- if (CPU_FEATURE_USABLE_P (cpu_features, FMA)) -+ if (CPU_FEATURE_USABLE_P (cpu_features, FMA4)) - return OPTIMIZE (fma4); - - return OPTIMIZE (sse2); -diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S -index bd5dc1a3f3..092f364bb6 100644 ---- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S -+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S -@@ -56,6 +56,13 @@ - # endif - #endif - -+/* Avoid short distance rep movsb only with non-SSE vector. */ -+#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB -+# define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16) -+#else -+# define AVOID_SHORT_DISTANCE_REP_MOVSB 0 -+#endif -+ - #ifndef PREFETCH - # define PREFETCH(addr) prefetcht0 addr - #endif -@@ -243,7 +250,21 @@ L(movsb): - cmpq %r9, %rdi - /* Avoid slow backward REP MOVSB. */ - jb L(more_8x_vec_backward) -+# if AVOID_SHORT_DISTANCE_REP_MOVSB -+ movq %rdi, %rcx -+ subq %rsi, %rcx -+ jmp 2f -+# endif - 1: -+# if AVOID_SHORT_DISTANCE_REP_MOVSB -+ movq %rsi, %rcx -+ subq %rdi, %rcx -+2: -+/* Avoid "rep movsb" if RCX, the distance between source and destination, -+ is N*4GB + [1..63] with N >= 0. */ -+ cmpl $63, %ecx -+ jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */ -+# endif - mov %RDX_LP, %RCX_LP - rep movsb - L(nop): -diff --git a/sysvipc/test-sysvsem.c b/sysvipc/test-sysvsem.c -index 01dbff343a..b7284e0b48 100644 ---- a/sysvipc/test-sysvsem.c -+++ b/sysvipc/test-sysvsem.c -@@ -20,6 +20,7 @@ - #include <stdlib.h> - #include <errno.h> - #include <string.h> -+#include <stdbool.h> - #include <sys/types.h> - #include <sys/ipc.h> - #include <sys/sem.h> -diff --git a/version.h b/version.h -index 83cd196798..e6ca7a8857 100644 ---- a/version.h -+++ b/version.h -@@ -1,4 +1,4 @@ - /* This file just defines the current version number of libc. */ - --#define RELEASE "release" -+#define RELEASE "stable" - #define VERSION "2.32" -diff -pruN glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/64/configure glibc-2.32/sysdeps/unix/sysv/linux/x86_64/64/configure ---- glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/64/configure 2021-09-18 21:02:32.741186019 +1000 -+++ glibc-2.32/sysdeps/unix/sysv/linux/x86_64/64/configure 2021-09-18 21:03:05.314302356 +1000 -@@ -4,10 +4,10 @@ - test -n "$libc_cv_slibdir" || - case "$prefix" in - /usr | /usr/) -- libc_cv_slibdir='/lib64' -- libc_cv_rtlddir='/lib64' -+ libc_cv_slibdir='/lib' -+ libc_cv_rtlddir='/lib' - if test "$libdir" = '${exec_prefix}/lib'; then -- libdir='${exec_prefix}/lib64'; -+ libdir='${exec_prefix}/lib'; - # Locale data can be shared between 32-bit and 64-bit libraries. - libc_cv_complocaledir='${exec_prefix}/lib/locale' - fi -diff -pruN glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/ldconfig.h glibc-2.32/sysdeps/unix/sysv/linux/x86_64/ldconfig.h ---- glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/ldconfig.h 2021-09-18 21:02:32.742186053 +1000 -+++ glibc-2.32/sysdeps/unix/sysv/linux/x86_64/ldconfig.h 2021-09-18 21:03:05.314302356 +1000 -@@ -18,9 +18,9 @@ - #include <sysdeps/generic/ldconfig.h> - - #define SYSDEP_KNOWN_INTERPRETER_NAMES \ -- { "/lib/ld-linux.so.2", FLAG_ELF_LIBC6 }, \ -+ { "/lib32/ld-linux.so.2", FLAG_ELF_LIBC6 }, \ - { "/libx32/ld-linux-x32.so.2", FLAG_ELF_LIBC6 }, \ -- { "/lib64/ld-linux-x86-64.so.2", FLAG_ELF_LIBC6 }, -+ { "/lib/ld-linux-x86-64.so.2", FLAG_ELF_LIBC6 }, - #define SYSDEP_KNOWN_LIBRARY_NAMES \ - { "libc.so.6", FLAG_ELF_LIBC6 }, \ - { "libm.so.6", FLAG_ELF_LIBC6 }, diff --git a/glibc/glibc-2.32-7.patch b/glibc/glibc-2.32-7.patch new file mode 100644 index 00000000..d7949e11 --- /dev/null +++ b/glibc/glibc-2.32-7.patch @@ -0,0 +1,17967 @@ +diff --git a/NEWS b/NEWS +index 485b8ddffa..d138a45519 100644 +--- a/NEWS ++++ b/NEWS +@@ -5,6 +5,30 @@ See the end for copying conditions. + Please send GNU C library bug reports via <https://sourceware.org/bugzilla/> + using `glibc' in the "product" field. + ++The following bugs are resolved with this release: ++ ++ [20019] NULL pointer dereference in libc.so.6 IFUNC due to uninitialized GOT ++ [26224] iconv hangs when converting some invalid inputs from several IBM ++ character sets (CVE-2020-27618) ++ [26534] libm.so 2.32 SIGILL in pow() due to FMA4 instruction on non-FMA4 ++ system ++ [26555] string: strerrorname_np does not return the documented value ++ [26600] Transaction ID collisions cause slow DNS lookups in getaddrinfo ++ [26636] libc: 32-bit shmctl(IPC_INFO) crashes when shminfo struct is ++ at the end of a memory mapping ++ [26637] libc: semctl SEM_STAT_ANY fails to pass the buffer specified ++ by the caller to the kernel ++ [26639] libc: msgctl IPC_INFO and MSG_INFO return garbage ++ [26853] aarch64: Missing unwind information in statically linked startup code ++ [26932] libc: sh: Multiple floating point functions defined as stubs only ++ [27130] "rep movsb" performance issue ++ [27177] GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on doesn't work ++ [27457] vzeroupper use in AVX2 multiarch string functions cause HTM aborts ++ [27974] Overflow bug in some implementation of wcsnlen, wmemchr, and wcsncat ++ [28524] Conversion from ISO-2022-JP-3 with iconv may emit spurious NULs ++ [28607] Masked signals are delivered on thread exit ++ [28755] overflow bug in wcsncmp_avx2 and wcsncmp_evex ++ + Version 2.32 + + Major new features: +@@ -185,6 +209,14 @@ Security related changes: + Dytrych of the Cisco Security Assessment and Penetration Team (See + TALOS-2020-1019). + ++ CVE-2020-27618: An infinite loop has been fixed in the iconv program when ++ invoked with input containing redundant shift sequences in the IBM1364, ++ IBM1371, IBM1388, IBM1390, or IBM1399 character sets. ++ ++ CVE-2021-33574: The mq_notify function has a potential use-after-free ++ issue when using a notification type of SIGEV_THREAD and a thread ++ attribute with a non-default affinity mask. ++ + The following bugs are resolved with this release: + + [9809] localedata: ckb_IQ: new Kurdish Sorani locale +diff --git a/Rules b/Rules +index 8b771f6095..beab969fde 100644 +--- a/Rules ++++ b/Rules +@@ -155,6 +155,7 @@ xtests: tests $(xtests-special) + else + tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \ + $(tests-container:%=$(objpfx)%.out) \ ++ $(tests-mcheck:%=$(objpfx)%-mcheck.out) \ + $(tests-special) $(tests-printers-out) + xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special) + endif +@@ -165,7 +166,7 @@ ifeq ($(run-built-tests),no) + tests-expected = + else + tests-expected = $(tests) $(tests-internal) $(tests-printers) \ +- $(tests-container) ++ $(tests-container) $(tests-mcheck:%=%-mcheck) + endif + tests: + $(..)scripts/merge-test-results.sh -s $(objpfx) $(subdir) \ +@@ -191,6 +192,7 @@ else + binaries-pie-tests = + binaries-pie-notests = + endif ++binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck) + else + binaries-all-notests = + binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs) +@@ -200,6 +202,7 @@ binaries-static-tests = + binaries-static = + binaries-pie-tests = + binaries-pie-notests = ++binaries-mcheck-tests = + endif + + binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests) +@@ -223,6 +226,14 @@ $(addprefix $(objpfx),$(binaries-shared-tests)): %: %.o \ + $(+link-tests) + endif + ++ifneq "$(strip $(binaries-mcheck-tests))" "" ++$(addprefix $(objpfx),$(binaries-mcheck-tests)): %-mcheck: %.o \ ++ $(link-extra-libs-tests) \ ++ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ ++ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) ++ $(+link-tests) ++endif ++ + ifneq "$(strip $(binaries-pie-tests))" "" + $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \ + $(link-extra-libs-tests) \ +@@ -253,6 +264,12 @@ $(addprefix $(objpfx),$(binaries-static-tests)): %: %.o \ + $(+link-static-tests) + endif + ++# All mcheck tests will be run with MALLOC_CHECK_=3 ++define mcheck-ENVS ++$(1)-mcheck-ENV = MALLOC_CHECK_=3 ++endef ++$(foreach t,$(tests-mcheck),$(eval $(call mcheck-ENVS,$(t)))) ++ + ifneq "$(strip $(tests) $(tests-internal) $(xtests) $(test-srcs))" "" + # These are the implicit rules for making test outputs + # from the test programs and whatever input files are present. +diff --git a/debug/Makefile b/debug/Makefile +index 3a60d7af7a..0036edd187 100644 +--- a/debug/Makefile ++++ b/debug/Makefile +@@ -51,7 +51,7 @@ routines = backtrace backtracesyms backtracesymsfd noophooks \ + explicit_bzero_chk \ + stack_chk_fail fortify_fail \ + $(static-only-routines) +-static-only-routines := warning-nop stack_chk_fail_local ++static-only-routines := stack_chk_fail_local + + # Don't add stack_chk_fail_local.o to libc.a since __stack_chk_fail_local + # is an alias of __stack_chk_fail in stack_chk_fail.o. +diff --git a/debug/warning-nop.c b/debug/warning-nop.c +deleted file mode 100644 +index 4ab7e182b7..0000000000 +--- a/debug/warning-nop.c ++++ /dev/null +@@ -1,70 +0,0 @@ +-/* Dummy nop functions to elicit link-time warnings. +- Copyright (C) 2005-2020 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- In addition to the permissions in the GNU Lesser General Public +- License, the Free Software Foundation gives you unlimited +- permission to link the compiled version of this file with other +- programs, and to distribute those programs without any restriction +- coming from the use of this file. (The GNU Lesser General Public +- License restrictions do apply in other respects; for example, they +- cover modification of the file, and distribution when not linked +- into another program.) +- +- Note that people who make modified versions of this file are not +- obligated to grant this special exception for their modified +- versions; it is their choice whether to do so. The GNU Lesser +- General Public License gives permission to release a modified +- version without this exception; this exception also makes it +- possible to release a modified version which carries forward this +- exception. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- <https://www.gnu.org/licenses/>. */ +- +-#include <sys/cdefs.h> +- +-static void +-__attribute__ ((used)) +-nop (void) +-{ +-} +- +-/* Don't insert any other #include's before this #undef! */ +- +-#undef __warndecl +-#define __warndecl(name, msg) \ +- extern void name (void) __attribute__ ((alias ("nop"))) attribute_hidden; \ +- link_warning (name, msg) +- +-#undef __USE_FORTIFY_LEVEL +-#define __USE_FORTIFY_LEVEL 99 +- +-/* Following here we need an #include for each public header file +- that uses __warndecl. */ +- +-/* Define away to avoid warnings with compilers that do not have these +- builtins. */ +-#define __builtin___memcpy_chk(dest, src, len, bos) NULL +-#define __builtin___memmove_chk(dest, src, len, bos) NULL +-#define __builtin___mempcpy_chk(dest, src, len, bos) NULL +-#define __builtin___memset_chk(dest, ch, len, bos) NULL +-#define __builtin___stpcpy_chk(dest, src, bos) NULL +-#define __builtin___strcat_chk(dest, src, bos) NULL +-#define __builtin___strcpy_chk(dest, src, bos) NULL +-#define __builtin___strncat_chk(dest, src, len, bos) NULL +-#define __builtin___strncpy_chk(dest, src, len, bos) NULL +-#define __builtin_object_size(bos, level) 0 +- +-#include <string.h> +diff --git a/elf/Makefile b/elf/Makefile +index 0b78721848..3ba7f4ecfc 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -1381,6 +1381,8 @@ CFLAGS-ifuncmain7pie.c += $(pie-ccflag) + CFLAGS-ifuncmain9pie.c += $(pie-ccflag) + CFLAGS-tst-ifunc-textrel.c += $(pic-ccflag) + ++LDFLAGS-ifuncmain6pie = -Wl,-z,lazy ++ + $(objpfx)ifuncmain1pie: $(objpfx)ifuncmod1.so + $(objpfx)ifuncmain1staticpie: $(objpfx)ifuncdep1pic.o + $(objpfx)ifuncmain1vispie: $(objpfx)ifuncmod1.so +@@ -1630,8 +1632,6 @@ $(objpfx)tst-nodelete-dlclose.out: $(objpfx)tst-nodelete-dlclose-dso.so \ + + tst-env-setuid-ENV = MALLOC_CHECK_=2 MALLOC_MMAP_THRESHOLD_=4096 \ + LD_HWCAP_MASK=0x1 +-tst-env-setuid-tunables-ENV = \ +- GLIBC_TUNABLES=glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096 + + $(objpfx)tst-debug1: $(libdl) + $(objpfx)tst-debug1.out: $(objpfx)tst-debug1mod1.so +diff --git a/elf/dl-load.c b/elf/dl-load.c +index e39980fb19..71867e7c1a 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -855,10 +855,12 @@ lose (int code, int fd, const char *name, char *realname, struct link_map *l, + + /* Process PT_GNU_PROPERTY program header PH in module L after + PT_LOAD segments are mapped. Only one NT_GNU_PROPERTY_TYPE_0 +- note is handled which contains processor specific properties. */ ++ note is handled which contains processor specific properties. ++ FD is -1 for the kernel mapped main executable otherwise it is ++ the fd used for loading module L. */ + + void +-_dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph) ++_dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) *ph) + { + const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr); + const ElfW(Addr) size = ph->p_memsz; +@@ -905,7 +907,7 @@ _dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph) + last_type = type; + + /* Target specific property processing. */ +- if (_dl_process_gnu_property (l, type, datasz, ptr) == 0) ++ if (_dl_process_gnu_property (l, fd, type, datasz, ptr) == 0) + return; + + /* Check the next property item. */ +@@ -1251,21 +1253,6 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, + maplength, has_holes, loader); + if (__glibc_unlikely (errstring != NULL)) + goto call_lose; +- +- /* Process program headers again after load segments are mapped in +- case processing requires accessing those segments. Scan program +- headers backward so that PT_NOTE can be skipped if PT_GNU_PROPERTY +- exits. */ +- for (ph = &phdr[l->l_phnum]; ph != phdr; --ph) +- switch (ph[-1].p_type) +- { +- case PT_NOTE: +- _dl_process_pt_note (l, &ph[-1]); +- break; +- case PT_GNU_PROPERTY: +- _dl_process_pt_gnu_property (l, &ph[-1]); +- break; +- } + } + + if (l->l_ld == 0) +@@ -1377,6 +1364,21 @@ cannot enable executable stack as shared object requires"); + if (l->l_tls_initimage != NULL) + l->l_tls_initimage = (char *) l->l_tls_initimage + l->l_addr; + ++ /* Process program headers again after load segments are mapped in ++ case processing requires accessing those segments. Scan program ++ headers backward so that PT_NOTE can be skipped if PT_GNU_PROPERTY ++ exits. */ ++ for (ph = &l->l_phdr[l->l_phnum]; ph != l->l_phdr; --ph) ++ switch (ph[-1].p_type) ++ { ++ case PT_NOTE: ++ _dl_process_pt_note (l, fd, &ph[-1]); ++ break; ++ case PT_GNU_PROPERTY: ++ _dl_process_pt_gnu_property (l, fd, &ph[-1]); ++ break; ++ } ++ + /* We are done mapping in the file. We no longer need the descriptor. */ + if (__glibc_unlikely (__close_nocancel (fd) != 0)) + { +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 8769e47051..55b39e1bbe 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -887,7 +887,7 @@ no more namespaces available for dlmopen()")); + /* Avoid keeping around a dangling reference to the libc.so link + map in case it has been cached in libc_map. */ + if (!args.libc_already_loaded) +- GL(dl_ns)[nsid].libc_map = NULL; ++ GL(dl_ns)[args.nsid].libc_map = NULL; + + /* Remove the object from memory. It may be in an inconsistent + state if relocation failed, for example. */ +diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c +index 26e6e26612..15b29bcb90 100644 +--- a/elf/dl-tunables.c ++++ b/elf/dl-tunables.c +@@ -177,6 +177,7 @@ parse_tunables (char *tunestr, char *valstring) + return; + + char *p = tunestr; ++ size_t off = 0; + + while (true) + { +@@ -190,7 +191,11 @@ parse_tunables (char *tunestr, char *valstring) + /* If we reach the end of the string before getting a valid name-value + pair, bail out. */ + if (p[len] == '\0') +- return; ++ { ++ if (__libc_enable_secure) ++ tunestr[off] = '\0'; ++ return; ++ } + + /* We did not find a valid name-value pair before encountering the + colon. */ +@@ -216,35 +221,28 @@ parse_tunables (char *tunestr, char *valstring) + + if (tunable_is_name (cur->name, name)) + { +- /* If we are in a secure context (AT_SECURE) then ignore the tunable +- unless it is explicitly marked as secure. Tunable values take +- precedence over their envvar aliases. */ ++ /* If we are in a secure context (AT_SECURE) then ignore the ++ tunable unless it is explicitly marked as secure. Tunable ++ values take precedence over their envvar aliases. We write ++ the tunables that are not SXID_ERASE back to TUNESTR, thus ++ dropping all SXID_ERASE tunables and any invalid or ++ unrecognized tunables. */ + if (__libc_enable_secure) + { +- if (cur->security_level == TUNABLE_SECLEVEL_SXID_ERASE) ++ if (cur->security_level != TUNABLE_SECLEVEL_SXID_ERASE) + { +- if (p[len] == '\0') +- { +- /* Last tunable in the valstring. Null-terminate and +- return. */ +- *name = '\0'; +- return; +- } +- else +- { +- /* Remove the current tunable from the string. We do +- this by overwriting the string starting from NAME +- (which is where the current tunable begins) with +- the remainder of the string. We then have P point +- to NAME so that we continue in the correct +- position in the valstring. */ +- char *q = &p[len + 1]; +- p = name; +- while (*q != '\0') +- *name++ = *q++; +- name[0] = '\0'; +- len = 0; +- } ++ if (off > 0) ++ tunestr[off++] = ':'; ++ ++ const char *n = cur->name; ++ ++ while (*n != '\0') ++ tunestr[off++] = *n++; ++ ++ tunestr[off++] = '='; ++ ++ for (size_t j = 0; j < len; j++) ++ tunestr[off++] = value[j]; + } + + if (cur->security_level != TUNABLE_SECLEVEL_NONE) +@@ -257,9 +255,7 @@ parse_tunables (char *tunestr, char *valstring) + } + } + +- if (p[len] == '\0') +- return; +- else ++ if (p[len] != '\0') + p += len + 1; + } + } +diff --git a/elf/ifuncmain6pie.c b/elf/ifuncmain6pie.c +index 04faeb86ef..4a01906836 100644 +--- a/elf/ifuncmain6pie.c ++++ b/elf/ifuncmain6pie.c +@@ -9,7 +9,6 @@ + #include "ifunc-sel.h" + + typedef int (*foo_p) (void); +-extern foo_p foo_ptr; + + static int + one (void) +@@ -28,20 +27,17 @@ foo_ifunc (void) + } + + extern int foo (void); +-extern foo_p get_foo (void); ++extern int call_foo (void); + extern foo_p get_foo_p (void); + +-foo_p my_foo_ptr = foo; ++foo_p foo_ptr = foo; + + int + main (void) + { + foo_p p; + +- p = get_foo (); +- if (p != foo) +- abort (); +- if ((*p) () != -30) ++ if (call_foo () != -30) + abort (); + + p = get_foo_p (); +@@ -52,12 +48,8 @@ main (void) + + if (foo_ptr != foo) + abort (); +- if (my_foo_ptr != foo) +- abort (); + if ((*foo_ptr) () != -30) + abort (); +- if ((*my_foo_ptr) () != -30) +- abort (); + if (foo () != -30) + abort (); + +diff --git a/elf/ifuncmod6.c b/elf/ifuncmod6.c +index 2e16c1d06d..2f6d0715e6 100644 +--- a/elf/ifuncmod6.c ++++ b/elf/ifuncmod6.c +@@ -4,7 +4,7 @@ extern int foo (void); + + typedef int (*foo_p) (void); + +-foo_p foo_ptr = foo; ++extern foo_p foo_ptr; + + foo_p + get_foo_p (void) +@@ -12,8 +12,8 @@ get_foo_p (void) + return foo_ptr; + } + +-foo_p +-get_foo (void) ++int ++call_foo (void) + { +- return foo; ++ return foo (); + } +diff --git a/elf/rtld.c b/elf/rtld.c +index 5b882163fa..14a42ed00a 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1534,10 +1534,10 @@ of this helper program; chances are you did not intend to run this program.\n\ + switch (ph[-1].p_type) + { + case PT_NOTE: +- _dl_process_pt_note (main_map, &ph[-1]); ++ _dl_process_pt_note (main_map, -1, &ph[-1]); + break; + case PT_GNU_PROPERTY: +- _dl_process_pt_gnu_property (main_map, &ph[-1]); ++ _dl_process_pt_gnu_property (main_map, -1, &ph[-1]); + break; + } + +diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c +index 971d5892b1..ca0c8c245c 100644 +--- a/elf/tst-env-setuid-tunables.c ++++ b/elf/tst-env-setuid-tunables.c +@@ -25,35 +25,76 @@ + #include "config.h" + #undef _LIBC + +-#define test_parent test_parent_tunables +-#define test_child test_child_tunables +- +-static int test_child_tunables (void); +-static int test_parent_tunables (void); +- +-#include "tst-env-setuid.c" +- +-#define CHILD_VALSTRING_VALUE "glibc.malloc.mmap_threshold=4096" +-#define PARENT_VALSTRING_VALUE \ +- "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096" ++#include <errno.h> ++#include <fcntl.h> ++#include <stdlib.h> ++#include <stdint.h> ++#include <stdio.h> ++#include <string.h> ++#include <sys/stat.h> ++#include <sys/wait.h> ++#include <unistd.h> ++#include <intprops.h> ++#include <array_length.h> ++ ++#include <support/check.h> ++#include <support/support.h> ++#include <support/test-driver.h> ++#include <support/capture_subprocess.h> ++ ++const char *teststrings[] = ++{ ++ "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096", ++ "glibc.malloc.check=2:glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096", ++ "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096:glibc.malloc.check=2", ++ "glibc.malloc.perturb=0x800", ++ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", ++ "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", ++ "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096", ++ "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", ++ "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2", ++ "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096", ++ ":glibc.malloc.garbage=2:glibc.malloc.check=1", ++ "glibc.malloc.check=1:glibc.malloc.check=2", ++ "not_valid.malloc.check=2", ++ "glibc.not_valid.check=2", ++}; ++ ++const char *resultstrings[] = ++{ ++ "glibc.malloc.mmap_threshold=4096", ++ "glibc.malloc.mmap_threshold=4096", ++ "glibc.malloc.mmap_threshold=4096", ++ "glibc.malloc.perturb=0x800", ++ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", ++ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", ++ "glibc.malloc.mmap_threshold=4096", ++ "glibc.malloc.mmap_threshold=4096", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++}; + + static int +-test_child_tunables (void) ++test_child (int off) + { + const char *val = getenv ("GLIBC_TUNABLES"); + + #if HAVE_TUNABLES +- if (val != NULL && strcmp (val, CHILD_VALSTRING_VALUE) == 0) ++ if (val != NULL && strcmp (val, resultstrings[off]) == 0) + return 0; + + if (val != NULL) +- printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val); ++ printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val); + + return 1; + #else + if (val != NULL) + { +- printf ("GLIBC_TUNABLES not cleared\n"); ++ printf ("[%d] GLIBC_TUNABLES not cleared\n", off); + return 1; + } + return 0; +@@ -61,15 +102,48 @@ test_child_tunables (void) + } + + static int +-test_parent_tunables (void) ++do_test (int argc, char **argv) + { +- const char *val = getenv ("GLIBC_TUNABLES"); ++ /* Setgid child process. */ ++ if (argc == 2) ++ { ++ if (getgid () == getegid ()) ++ /* This can happen if the file system is mounted nosuid. */ ++ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", ++ (intmax_t) getgid ()); + +- if (val != NULL && strcmp (val, PARENT_VALSTRING_VALUE) == 0) +- return 0; ++ int ret = test_child (atoi (argv[1])); + +- if (val != NULL) +- printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val); ++ if (ret != 0) ++ exit (1); + +- return 1; ++ exit (EXIT_SUCCESS); ++ } ++ else ++ { ++ int ret = 0; ++ ++ /* Spawn tests. */ ++ for (int i = 0; i < array_length (teststrings); i++) ++ { ++ char buf[INT_BUFSIZE_BOUND (int)]; ++ ++ printf ("Spawned test for %s (%d)\n", teststrings[i], i); ++ snprintf (buf, sizeof (buf), "%d\n", i); ++ if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0) ++ exit (1); ++ ++ int status = support_capture_subprogram_self_sgid (buf); ++ ++ /* Bail out early if unsupported. */ ++ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) ++ return EXIT_UNSUPPORTED; ++ ++ ret |= status; ++ } ++ return ret; ++ } + } ++ ++#define TEST_FUNCTION_ARGV do_test ++#include <support/test-driver.c> +diff --git a/elf/tst-env-setuid.c b/elf/tst-env-setuid.c +index 41dc79e83a..2dbccdb69e 100644 +--- a/elf/tst-env-setuid.c ++++ b/elf/tst-env-setuid.c +@@ -29,173 +29,12 @@ + #include <sys/wait.h> + #include <unistd.h> + ++#include <support/check.h> + #include <support/support.h> + #include <support/test-driver.h> ++#include <support/capture_subprocess.h> + + static char SETGID_CHILD[] = "setgid-child"; +-#define CHILD_STATUS 42 +- +-/* Return a GID which is not our current GID, but is present in the +- supplementary group list. */ +-static gid_t +-choose_gid (void) +-{ +- const int count = 64; +- gid_t groups[count]; +- int ret = getgroups (count, groups); +- if (ret < 0) +- { +- printf ("getgroups: %m\n"); +- exit (1); +- } +- gid_t current = getgid (); +- for (int i = 0; i < ret; ++i) +- { +- if (groups[i] != current) +- return groups[i]; +- } +- return 0; +-} +- +-/* Spawn and execute a program and verify that it returns the CHILD_STATUS. */ +-static pid_t +-do_execve (char **args) +-{ +- pid_t kid = vfork (); +- +- if (kid < 0) +- { +- printf ("vfork: %m\n"); +- return -1; +- } +- +- if (kid == 0) +- { +- /* Child process. */ +- execve (args[0], args, environ); +- _exit (-errno); +- } +- +- if (kid < 0) +- return 1; +- +- int status; +- +- if (waitpid (kid, &status, 0) < 0) +- { +- printf ("waitpid: %m\n"); +- return 1; +- } +- +- if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) +- return EXIT_UNSUPPORTED; +- +- if (!WIFEXITED (status) || WEXITSTATUS (status) != CHILD_STATUS) +- { +- printf ("Unexpected exit status %d from child process\n", +- WEXITSTATUS (status)); +- return 1; +- } +- return 0; +-} +- +-/* Copies the executable into a restricted directory, so that we can +- safely make it SGID with the TARGET group ID. Then runs the +- executable. */ +-static int +-run_executable_sgid (gid_t target) +-{ +- char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd", +- test_dir, (intmax_t) getpid ()); +- char *execname = xasprintf ("%s/bin", dirname); +- int infd = -1; +- int outfd = -1; +- int ret = 0; +- if (mkdir (dirname, 0700) < 0) +- { +- printf ("mkdir: %m\n"); +- goto err; +- } +- infd = open ("/proc/self/exe", O_RDONLY); +- if (infd < 0) +- { +- printf ("open (/proc/self/exe): %m\n"); +- goto err; +- } +- outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); +- if (outfd < 0) +- { +- printf ("open (%s): %m\n", execname); +- goto err; +- } +- char buf[4096]; +- for (;;) +- { +- ssize_t rdcount = read (infd, buf, sizeof (buf)); +- if (rdcount < 0) +- { +- printf ("read: %m\n"); +- goto err; +- } +- if (rdcount == 0) +- break; +- char *p = buf; +- char *end = buf + rdcount; +- while (p != end) +- { +- ssize_t wrcount = write (outfd, buf, end - p); +- if (wrcount == 0) +- errno = ENOSPC; +- if (wrcount <= 0) +- { +- printf ("write: %m\n"); +- goto err; +- } +- p += wrcount; +- } +- } +- if (fchown (outfd, getuid (), target) < 0) +- { +- printf ("fchown (%s): %m\n", execname); +- goto err; +- } +- if (fchmod (outfd, 02750) < 0) +- { +- printf ("fchmod (%s): %m\n", execname); +- goto err; +- } +- if (close (outfd) < 0) +- { +- printf ("close (outfd): %m\n"); +- goto err; +- } +- if (close (infd) < 0) +- { +- printf ("close (infd): %m\n"); +- goto err; +- } +- +- char *args[] = {execname, SETGID_CHILD, NULL}; +- +- ret = do_execve (args); +- +-err: +- if (outfd >= 0) +- close (outfd); +- if (infd >= 0) +- close (infd); +- if (execname) +- { +- unlink (execname); +- free (execname); +- } +- if (dirname) +- { +- rmdir (dirname); +- free (dirname); +- } +- return ret; +-} + + #ifndef test_child + static int +@@ -256,40 +95,32 @@ do_test (int argc, char **argv) + if (argc == 2 && strcmp (argv[1], SETGID_CHILD) == 0) + { + if (getgid () == getegid ()) +- { +- /* This can happen if the file system is mounted nosuid. */ +- fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n", +- (intmax_t) getgid ()); +- exit (EXIT_UNSUPPORTED); +- } ++ /* This can happen if the file system is mounted nosuid. */ ++ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", ++ (intmax_t) getgid ()); + + int ret = test_child (); + + if (ret != 0) + exit (1); + +- exit (CHILD_STATUS); ++ exit (EXIT_SUCCESS); + } + else + { + if (test_parent () != 0) + exit (1); + +- /* Try running a setgid program. */ +- gid_t target = choose_gid (); +- if (target == 0) +- { +- fprintf (stderr, +- "Could not find a suitable GID for user %jd, skipping test\n", +- (intmax_t) getuid ()); +- exit (0); +- } ++ int status = support_capture_subprogram_self_sgid (SETGID_CHILD); + +- return run_executable_sgid (target); +- } ++ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) ++ return EXIT_UNSUPPORTED; ++ ++ if (!WIFEXITED (status)) ++ FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); + +- /* Something went wrong and our argv was corrupted. */ +- _exit (1); ++ return 0; ++ } + } + + #define TEST_FUNCTION_ARGV do_test +diff --git a/iconv/Versions b/iconv/Versions +index 8a5f4cf780..d51af52fa3 100644 +--- a/iconv/Versions ++++ b/iconv/Versions +@@ -6,7 +6,9 @@ libc { + GLIBC_PRIVATE { + # functions shared with iconv program + __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; +- __gconv_open; __gconv_create_spec; ++ ++ # functions used elsewhere in glibc ++ __gconv_open; __gconv_create_spec; __gconv_destroy_spec; + + # function used by the gconv modules + __gconv_transliterate; +diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c +index 6ccd0773cc..4ba0aa99f5 100644 +--- a/iconv/gconv_charset.c ++++ b/iconv/gconv_charset.c +@@ -216,3 +216,13 @@ out: + return ret; + } + libc_hidden_def (__gconv_create_spec) ++ ++ ++void ++__gconv_destroy_spec (struct gconv_spec *conv_spec) ++{ ++ free (conv_spec->fromcode); ++ free (conv_spec->tocode); ++ return; ++} ++libc_hidden_def (__gconv_destroy_spec) +diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h +index b39b09aea1..e9c122cf7e 100644 +--- a/iconv/gconv_charset.h ++++ b/iconv/gconv_charset.h +@@ -48,33 +48,6 @@ + #define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE" + + +-/* This function accepts the charset names of the source and destination of the +- conversion and populates *conv_spec with an equivalent conversion +- specification that may later be used by __gconv_open. The charset names +- might contain options in the form of suffixes that alter the conversion, +- e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring +- and truncating any suffix options in fromcode, and processing and truncating +- any suffix options in tocode. Supported suffix options ("TRANSLIT" or +- "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec +- to be set to true. Unrecognized suffix options are silently discarded. If +- the function succeeds, it returns conv_spec back to the caller. It returns +- NULL upon failure. */ +-struct gconv_spec * +-__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, +- const char *tocode); +-libc_hidden_proto (__gconv_create_spec) +- +- +-/* This function frees all heap memory allocated by __gconv_create_spec. */ +-static void __attribute__ ((unused)) +-gconv_destroy_spec (struct gconv_spec *conv_spec) +-{ +- free (conv_spec->fromcode); +- free (conv_spec->tocode); +- return; +-} +- +- + /* This function copies in-order, characters from the source 's' that are + either alpha-numeric or one in one of these: "_-.,:/" - into the destination + 'wp' while dropping all other characters. In the process, it converts all +diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h +index e86938dae7..f721ce30ff 100644 +--- a/iconv/gconv_int.h ++++ b/iconv/gconv_int.h +@@ -152,6 +152,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec, + __gconv_t *handle, int flags); + libc_hidden_proto (__gconv_open) + ++/* This function accepts the charset names of the source and destination of the ++ conversion and populates *conv_spec with an equivalent conversion ++ specification that may later be used by __gconv_open. The charset names ++ might contain options in the form of suffixes that alter the conversion, ++ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring ++ and truncating any suffix options in fromcode, and processing and truncating ++ any suffix options in tocode. Supported suffix options ("TRANSLIT" or ++ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec ++ to be set to true. Unrecognized suffix options are silently discarded. If ++ the function succeeds, it returns conv_spec back to the caller. It returns ++ NULL upon failure. */ ++extern struct gconv_spec * ++__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, ++ const char *tocode); ++libc_hidden_proto (__gconv_create_spec) ++ ++/* This function frees all heap memory allocated by __gconv_create_spec. */ ++extern void ++__gconv_destroy_spec (struct gconv_spec *conv_spec); ++libc_hidden_proto (__gconv_destroy_spec) ++ + /* Free resources associated with transformation descriptor CD. */ + extern int __gconv_close (__gconv_t cd) + attribute_hidden; +diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c +index dd54bc12e0..5b30055c04 100644 +--- a/iconv/iconv_open.c ++++ b/iconv/iconv_open.c +@@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode) + + int res = __gconv_open (&conv_spec, &cd, 0); + +- gconv_destroy_spec (&conv_spec); ++ __gconv_destroy_spec (&conv_spec); + + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index b4334faa57..d59979759c 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -184,7 +184,7 @@ main (int argc, char *argv[]) + /* Let's see whether we have these coded character sets. */ + res = __gconv_open (&conv_spec, &cd, 0); + +- gconv_destroy_spec (&conv_spec); ++ __gconv_destroy_spec (&conv_spec); + + if (res != __GCONV_OK) + { +diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh +index 8298136b7f..d8db7b335c 100644 +--- a/iconv/tst-iconv_prog.sh ++++ b/iconv/tst-iconv_prog.sh +@@ -102,12 +102,16 @@ hangarray=( + "\x00\x80;-c;IBM1161;UTF-8//TRANSLIT//IGNORE" + "\x00\xdb;-c;IBM1162;UTF-8//TRANSLIT//IGNORE" + "\x00\x70;-c;IBM12712;UTF-8//TRANSLIT//IGNORE" +-# These are known hangs that are yet to be fixed: +-# "\x00\x0f;-c;IBM1364;UTF-8" +-# "\x00\x0f;-c;IBM1371;UTF-8" +-# "\x00\x0f;-c;IBM1388;UTF-8" +-# "\x00\x0f;-c;IBM1390;UTF-8" +-# "\x00\x0f;-c;IBM1399;UTF-8" ++"\x00\x0f;-c;IBM1364;UTF-8" ++"\x0e\x0e;-c;IBM1364;UTF-8" ++"\x00\x0f;-c;IBM1371;UTF-8" ++"\x0e\x0e;-c;IBM1371;UTF-8" ++"\x00\x0f;-c;IBM1388;UTF-8" ++"\x0e\x0e;-c;IBM1388;UTF-8" ++"\x00\x0f;-c;IBM1390;UTF-8" ++"\x0e\x0e;-c;IBM1390;UTF-8" ++"\x00\x0f;-c;IBM1399;UTF-8" ++"\x0e\x0e;-c;IBM1399;UTF-8" + "\x00\x53;-c;IBM16804;UTF-8//TRANSLIT//IGNORE" + "\x00\x41;-c;IBM274;UTF-8//TRANSLIT//IGNORE" + "\x00\x41;-c;IBM275;UTF-8//TRANSLIT//IGNORE" +diff --git a/iconvdata/Makefile b/iconvdata/Makefile +index 4ec2741cdc..b67b4feeb4 100644 +--- a/iconvdata/Makefile ++++ b/iconvdata/Makefile +@@ -1,4 +1,5 @@ + # Copyright (C) 1997-2020 Free Software Foundation, Inc. ++# Copyright (C) The GNU Toolchain Authors. + # This file is part of the GNU C Library. + + # The GNU C Library is free software; you can redistribute it and/or +@@ -73,7 +74,8 @@ modules.so := $(addsuffix .so, $(modules)) + ifeq (yes,$(build-shared)) + tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ + tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ +- bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 ++ bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \ ++ bug-iconv13 bug-iconv14 bug-iconv15 + ifeq ($(have-thread-library),yes) + tests += bug-iconv3 + endif +@@ -321,6 +323,10 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) + $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) ++$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \ ++ $(addprefix $(objpfx),$(modules.so)) ++$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \ ++ $(addprefix $(objpfx),$(modules.so)) + + $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) \ +diff --git a/iconvdata/bug-iconv13.c b/iconvdata/bug-iconv13.c +new file mode 100644 +index 0000000000..87aaff398e +--- /dev/null ++++ b/iconvdata/bug-iconv13.c +@@ -0,0 +1,53 @@ ++/* bug 24973: Test EUC-KR module ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <errno.h> ++#include <iconv.h> ++#include <stdio.h> ++#include <support/check.h> ++ ++static int ++do_test (void) ++{ ++ iconv_t cd = iconv_open ("UTF-8//IGNORE", "EUC-KR"); ++ TEST_VERIFY_EXIT (cd != (iconv_t) -1); ++ ++ /* 0xfe (->0x7e : row 94) and 0xc9 (->0x49 : row 41) are user-defined ++ areas, which are not allowed and should be skipped over due to ++ //IGNORE. The trailing 0xfe also is an incomplete sequence, which ++ should be checked first. */ ++ char input[4] = { '\xc9', '\xa1', '\0', '\xfe' }; ++ char *inptr = input; ++ size_t insize = sizeof (input); ++ char output[4]; ++ char *outptr = output; ++ size_t outsize = sizeof (output); ++ ++ /* This used to crash due to buffer overrun. */ ++ TEST_VERIFY (iconv (cd, &inptr, &insize, &outptr, &outsize) == (size_t) -1); ++ TEST_VERIFY (errno == EINVAL); ++ /* The conversion should produce one character, the converted null ++ character. */ ++ TEST_VERIFY (sizeof (output) - outsize == 1); ++ ++ TEST_VERIFY_EXIT (iconv_close (cd) != -1); ++ ++ return 0; ++} ++ ++#include <support/test-driver.c> +diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c +new file mode 100644 +index 0000000000..902f140fa9 +--- /dev/null ++++ b/iconvdata/bug-iconv14.c +@@ -0,0 +1,127 @@ ++/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256). ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <iconv.h> ++#include <string.h> ++#include <errno.h> ++#include <support/check.h> ++ ++/* Use an escape sequence to return to the initial state. */ ++static void ++with_escape_sequence (void) ++{ ++ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3"); ++ TEST_VERIFY_EXIT (c != (iconv_t) -1); ++ ++ char in[] = "\e$(O+D\e(B"; ++ char *inbuf = in; ++ size_t inleft = strlen (in); ++ char out[3]; /* Space for one output character. */ ++ char *outbuf; ++ size_t outleft; ++ ++ outbuf = out; ++ outleft = sizeof (out); ++ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1); ++ TEST_COMPARE (errno, E2BIG); ++ TEST_COMPARE (inleft, 3); ++ TEST_COMPARE (inbuf - in, strlen (in) - 3); ++ TEST_COMPARE (outleft, sizeof (out) - 2); ++ TEST_COMPARE (outbuf - out, 2); ++ TEST_COMPARE (out[0] & 0xff, 0xc3); ++ TEST_COMPARE (out[1] & 0xff, 0xa6); ++ ++ /* Return to the initial shift state, producing the pending ++ character. */ ++ outbuf = out; ++ outleft = sizeof (out); ++ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0); ++ TEST_COMPARE (inleft, 0); ++ TEST_COMPARE (inbuf - in, strlen (in)); ++ TEST_COMPARE (outleft, sizeof (out) - 2); ++ TEST_COMPARE (outbuf - out, 2); ++ TEST_COMPARE (out[0] & 0xff, 0xcc); ++ TEST_COMPARE (out[1] & 0xff, 0x80); ++ ++ /* Nothing should be flushed the second time. */ ++ outbuf = out; ++ outleft = sizeof (out); ++ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); ++ TEST_COMPARE (outleft, sizeof (out)); ++ TEST_COMPARE (outbuf - out, 0); ++ TEST_COMPARE (out[0] & 0xff, 0xcc); ++ TEST_COMPARE (out[1] & 0xff, 0x80); ++ ++ TEST_COMPARE (iconv_close (c), 0); ++} ++ ++/* Use an explicit flush to return to the initial state. */ ++static void ++with_flush (void) ++{ ++ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3"); ++ TEST_VERIFY_EXIT (c != (iconv_t) -1); ++ ++ char in[] = "\e$(O+D"; ++ char *inbuf = in; ++ size_t inleft = strlen (in); ++ char out[3]; /* Space for one output character. */ ++ char *outbuf; ++ size_t outleft; ++ ++ outbuf = out; ++ outleft = sizeof (out); ++ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1); ++ TEST_COMPARE (errno, E2BIG); ++ TEST_COMPARE (inleft, 0); ++ TEST_COMPARE (inbuf - in, strlen (in)); ++ TEST_COMPARE (outleft, sizeof (out) - 2); ++ TEST_COMPARE (outbuf - out, 2); ++ TEST_COMPARE (out[0] & 0xff, 0xc3); ++ TEST_COMPARE (out[1] & 0xff, 0xa6); ++ ++ /* Flush the pending character. */ ++ outbuf = out; ++ outleft = sizeof (out); ++ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); ++ TEST_COMPARE (outleft, sizeof (out) - 2); ++ TEST_COMPARE (outbuf - out, 2); ++ TEST_COMPARE (out[0] & 0xff, 0xcc); ++ TEST_COMPARE (out[1] & 0xff, 0x80); ++ ++ /* Nothing should be flushed the second time. */ ++ outbuf = out; ++ outleft = sizeof (out); ++ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); ++ TEST_COMPARE (outleft, sizeof (out)); ++ TEST_COMPARE (outbuf - out, 0); ++ TEST_COMPARE (out[0] & 0xff, 0xcc); ++ TEST_COMPARE (out[1] & 0xff, 0x80); ++ ++ TEST_COMPARE (iconv_close (c), 0); ++} ++ ++static int ++do_test (void) ++{ ++ with_escape_sequence (); ++ with_flush (); ++ return 0; ++} ++ ++#include <support/test-driver.c> +diff --git a/iconvdata/bug-iconv15.c b/iconvdata/bug-iconv15.c +new file mode 100644 +index 0000000000..cc04bd0313 +--- /dev/null ++++ b/iconvdata/bug-iconv15.c +@@ -0,0 +1,60 @@ ++/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv ++ may emit spurious NUL character on state reset. ++ Copyright (C) The GNU Toolchain Authors. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <stddef.h> ++#include <iconv.h> ++#include <support/check.h> ++ ++static int ++do_test (void) ++{ ++ char in[] = "\x1b(I"; ++ char *inbuf = in; ++ size_t inleft = sizeof (in) - 1; ++ char out[1]; ++ char *outbuf = out; ++ size_t outleft = sizeof (out); ++ iconv_t cd; ++ ++ cd = iconv_open ("UTF8", "ISO-2022-JP-3"); ++ TEST_VERIFY_EXIT (cd != (iconv_t) -1); ++ ++ /* First call to iconv should alter internal state. ++ Now, JISX0201_Kana_set is selected and ++ state value != ASCII_set. */ ++ TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1); ++ ++ /* No bytes should have been added to ++ the output buffer at this point. */ ++ TEST_VERIFY (outbuf == out); ++ TEST_VERIFY (outleft == sizeof (out)); ++ ++ /* Second call shall emit spurious NUL character in unpatched glibc. */ ++ TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1); ++ ++ /* No characters are expected to be produced. */ ++ TEST_VERIFY (outbuf == out); ++ TEST_VERIFY (outleft == sizeof (out)); ++ ++ TEST_VERIFY_EXIT (iconv_close (cd) != -1); ++ ++ return 0; ++} ++ ++#include <support/test-driver.c> +diff --git a/iconvdata/euc-kr.c b/iconvdata/euc-kr.c +index b0d56cf3ee..1045bae926 100644 +--- a/iconvdata/euc-kr.c ++++ b/iconvdata/euc-kr.c +@@ -80,11 +80,7 @@ euckr_from_ucs4 (uint32_t ch, unsigned char *cp) + \ + if (ch <= 0x9f) \ + ++inptr; \ +- /* 0xfe(->0x7e : row 94) and 0xc9(->0x59 : row 41) are \ +- user-defined areas. */ \ +- else if (__builtin_expect (ch == 0xa0, 0) \ +- || __builtin_expect (ch > 0xfe, 0) \ +- || __builtin_expect (ch == 0xc9, 0)) \ ++ else if (__glibc_unlikely (ch == 0xa0)) \ + { \ + /* This is illegal. */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (1); \ +diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c +index 49e7267ab4..521f0825b7 100644 +--- a/iconvdata/ibm1364.c ++++ b/iconvdata/ibm1364.c +@@ -158,24 +158,14 @@ enum + \ + if (__builtin_expect (ch, 0) == SO) \ + { \ +- /* Shift OUT, change to DBCS converter. */ \ +- if (curcs == db) \ +- { \ +- result = __GCONV_ILLEGAL_INPUT; \ +- break; \ +- } \ ++ /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ + curcs = db; \ + ++inptr; \ + continue; \ + } \ + if (__builtin_expect (ch, 0) == SI) \ + { \ +- /* Shift IN, change to SBCS converter. */ \ +- if (curcs == sb) \ +- { \ +- result = __GCONV_ILLEGAL_INPUT; \ +- break; \ +- } \ ++ /* Shift IN, change to SBCS converter (redundant escape okay). */ \ + curcs = sb; \ + ++inptr; \ + continue; \ +diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c +index 8c3b7e627e..c7b470db61 100644 +--- a/iconvdata/iso-2022-jp-3.c ++++ b/iconvdata/iso-2022-jp-3.c +@@ -1,5 +1,6 @@ + /* Conversion module for ISO-2022-JP-3. + Copyright (C) 1998-2020 Free Software Foundation, Inc. ++ Copyright (C) The GNU Toolchain Authors. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998, + and Bruno Haible <bruno@clisp.org>, 2002. +@@ -67,10 +68,15 @@ enum + CURRENT_SEL_MASK = 7 << 3 + }; + +-/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state +- also contains the last two bytes to be output, shifted by 6 bits, and a +- one-bit indicator whether they must be preceded by the shift sequence, +- in bit 22. */ ++/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the ++ state also contains the last two bytes to be output, shifted by 6 ++ bits, and a one-bit indicator whether they must be preceded by the ++ shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4 ++ conversion, COUNT may also contain a non-zero pending wide ++ character, shifted by six bits. This happens for certain inputs in ++ JISX0213_1_2004_set and JISX0213_2_set if the second wide character ++ in a combining sequence cannot be written because the buffer is ++ full. */ + + /* Since this is a stateful encoding we have to provide code which resets + the output state to the initial state. This has to be done during the +@@ -80,10 +86,27 @@ enum + { \ + if (FROM_DIRECTION) \ + { \ +- /* It's easy, we don't have to emit anything, we just reset the \ +- state for the input. */ \ +- data->__statep->__count &= 7; \ +- data->__statep->__count |= ASCII_set; \ ++ uint32_t ch = data->__statep->__count >> 6; \ ++ \ ++ if (__glibc_unlikely (ch != 0)) \ ++ { \ ++ if (__glibc_likely (outbuf + 4 <= outend)) \ ++ { \ ++ /* Write out the last character. */ \ ++ put32u (outbuf, ch); \ ++ outbuf += 4; \ ++ data->__statep->__count &= 7; \ ++ data->__statep->__count |= ASCII_set; \ ++ } \ ++ else \ ++ /* We don't have enough room in the output buffer. */ \ ++ status = __GCONV_FULL_OUTPUT; \ ++ } \ ++ else \ ++ { \ ++ data->__statep->__count &= 7; \ ++ data->__statep->__count |= ASCII_set; \ ++ } \ + } \ + else \ + { \ +@@ -151,7 +174,21 @@ enum + #define LOOPFCT FROM_LOOP + #define BODY \ + { \ +- uint32_t ch = *inptr; \ ++ uint32_t ch; \ ++ \ ++ /* Output any pending character. */ \ ++ ch = set >> 6; \ ++ if (__glibc_unlikely (ch != 0)) \ ++ { \ ++ put32 (outptr, ch); \ ++ outptr += 4; \ ++ /* Remove the pending character, but preserve state bits. */ \ ++ set &= (1 << 6) - 1; \ ++ continue; \ ++ } \ ++ \ ++ /* Otherwise read the next input byte. */ \ ++ ch = *inptr; \ + \ + /* Recognize escape sequences. */ \ + if (__glibc_unlikely (ch == ESC)) \ +@@ -297,21 +334,25 @@ enum + uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ + uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ + \ ++ inptr += 2; \ ++ \ ++ put32 (outptr, u1); \ ++ outptr += 4; \ ++ \ + /* See whether we have room for two characters. */ \ +- if (outptr + 8 <= outend) \ ++ if (outptr + 4 <= outend) \ + { \ +- inptr += 2; \ +- put32 (outptr, u1); \ +- outptr += 4; \ + put32 (outptr, u2); \ + outptr += 4; \ + continue; \ + } \ +- else \ +- { \ +- result = __GCONV_FULL_OUTPUT; \ +- break; \ +- } \ ++ \ ++ /* Otherwise store only the first character now, and \ ++ put the second one into the queue. */ \ ++ set |= u2 << 6; \ ++ /* Tell the caller why we terminate the loop. */ \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ + } \ + \ + inptr += 2; \ +diff --git a/iconvdata/ksc5601.h b/iconvdata/ksc5601.h +index d3eb3a4ff8..f5cdc72797 100644 +--- a/iconvdata/ksc5601.h ++++ b/iconvdata/ksc5601.h +@@ -50,15 +50,15 @@ ksc5601_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset) + unsigned char ch2; + int idx; + ++ if (avail < 2) ++ return 0; ++ + /* row 94(0x7e) and row 41(0x49) are user-defined area in KS C 5601 */ + + if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) >= 0x7e + || (ch - offset) == 0x49) + return __UNKNOWN_10646_CHAR; + +- if (avail < 2) +- return 0; +- + ch2 = (*s)[1]; + if (ch2 < offset || (ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f) + return __UNKNOWN_10646_CHAR; +diff --git a/intl/dcigettext.c b/intl/dcigettext.c +index 2e7c662bc7..bd332e71da 100644 +--- a/intl/dcigettext.c ++++ b/intl/dcigettext.c +@@ -1120,15 +1120,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file, + + # ifdef _LIBC + +- struct gconv_spec conv_spec +- = { .fromcode = norm_add_slashes (charset, ""), +- .tocode = norm_add_slashes (outcharset, ""), +- /* We always want to use transliteration. */ +- .translit = true, +- .ignore = false +- }; ++ struct gconv_spec conv_spec; ++ ++ __gconv_create_spec (&conv_spec, charset, outcharset); ++ ++ /* We always want to use transliteration. */ ++ conv_spec.translit = true; ++ + int r = __gconv_open (&conv_spec, &convd->conv, + GCONV_AVOID_NOCONV); ++ ++ __gconv_destroy_spec (&conv_spec); ++ + if (__builtin_expect (r != __GCONV_OK, 0)) + { + /* If the output encoding is the same there is +diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c +index fd70432eca..e9f6e5e09f 100644 +--- a/intl/tst-codeset.c ++++ b/intl/tst-codeset.c +@@ -22,13 +22,11 @@ + #include <stdio.h> + #include <stdlib.h> + #include <string.h> ++#include <support/check.h> + + static int + do_test (void) + { +- char *s; +- int result = 0; +- + unsetenv ("LANGUAGE"); + unsetenv ("OUTPUT_CHARSET"); + setlocale (LC_ALL, "de_DE.ISO-8859-1"); +@@ -36,25 +34,21 @@ do_test (void) + bindtextdomain ("codeset", OBJPFX "domaindir"); + + /* Here we expect output in ISO-8859-1. */ +- s = gettext ("cheese"); +- if (strcmp (s, "K\344se")) +- { +- printf ("call 1 returned: %s\n", s); +- result = 1; +- } ++ TEST_COMPARE_STRING (gettext ("cheese"), "K\344se"); + ++ /* Here we expect output in UTF-8. */ + bind_textdomain_codeset ("codeset", "UTF-8"); ++ TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se"); + +- /* Here we expect output in UTF-8. */ +- s = gettext ("cheese"); +- if (strcmp (s, "K\303\244se")) +- { +- printf ("call 2 returned: %s\n", s); +- result = 1; +- } +- +- return result; ++ /* `a with umlaut' is transliterated to `ae'. */ ++ bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT"); ++ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); ++ ++ /* Transliteration also works by default even if not set. */ ++ bind_textdomain_codeset ("codeset", "ASCII"); ++ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); ++ ++ return 0; + } + +-#define TEST_FUNCTION do_test () +-#include "../test-skeleton.c" ++#include <support/test-driver.c> +diff --git a/malloc/Makefile b/malloc/Makefile +index e22cbde22d..5093e8730e 100644 +--- a/malloc/Makefile ++++ b/malloc/Makefile +@@ -62,6 +62,16 @@ endif + tests += $(tests-static) + test-srcs = tst-mtrace + ++# These tests either are run with MALLOC_CHECK_=3 by default or do not work ++# with MALLOC_CHECK_=3 because they expect a specific failure. ++tests-exclude-mcheck = tst-mcheck tst-malloc-usable \ ++ tst-interpose-nothread tst-interpose-static-nothread \ ++ tst-interpose-static-thread tst-malloc-too-large \ ++ tst-mxfast tst-safe-linking ++ ++# Run all tests with MALLOC_CHECK_=3 ++tests-mcheck = $(filter-out $(tests-exclude-mcheck),$(tests)) ++ + routines = malloc morecore mcheck mtrace obstack reallocarray \ + scratch_buffer_grow scratch_buffer_grow_preserve \ + scratch_buffer_set_array_size \ +@@ -100,6 +110,11 @@ $(objpfx)tst-malloc-thread-exit: $(shared-thread-library) + $(objpfx)tst-malloc-thread-fail: $(shared-thread-library) + $(objpfx)tst-malloc-fork-deadlock: $(shared-thread-library) + $(objpfx)tst-malloc-stats-cancellation: $(shared-thread-library) ++$(objpfx)tst-malloc-backtrace-mcheck: $(shared-thread-library) ++$(objpfx)tst-malloc-thread-exit-mcheck: $(shared-thread-library) ++$(objpfx)tst-malloc-thread-fail-mcheck: $(shared-thread-library) ++$(objpfx)tst-malloc-fork-deadlock-mcheck: $(shared-thread-library) ++$(objpfx)tst-malloc-stats-cancellation-mcheck: $(shared-thread-library) + + # Export the __malloc_initialize_hook variable to libc.so. + LDFLAGS-tst-mallocstate = -rdynamic +@@ -239,6 +254,8 @@ $(tests:%=$(objpfx)%.o): CPPFLAGS += -DTEST_NO_MALLOPT + $(objpfx)tst-interpose-nothread: $(objpfx)tst-interpose-aux-nothread.o + $(objpfx)tst-interpose-thread: \ + $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library) ++$(objpfx)tst-interpose-thread-mcheck: \ ++ $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library) + $(objpfx)tst-interpose-static-nothread: $(objpfx)tst-interpose-aux-nothread.o + $(objpfx)tst-interpose-static-thread: \ + $(objpfx)tst-interpose-aux-thread.o $(static-thread-library) +@@ -256,3 +273,6 @@ $(objpfx)tst-dynarray-fail-mem.out: $(objpfx)tst-dynarray-fail.out + $(objpfx)tst-malloc-tcache-leak: $(shared-thread-library) + $(objpfx)tst-malloc_info: $(shared-thread-library) + $(objpfx)tst-mallocfork2: $(shared-thread-library) ++$(objpfx)tst-malloc-tcache-leak-mcheck: $(shared-thread-library) ++$(objpfx)tst-malloc_info-mcheck: $(shared-thread-library) ++$(objpfx)tst-mallocfork2-mcheck: $(shared-thread-library) +diff --git a/manual/tunables.texi b/manual/tunables.texi +index 23ef0d40e7..d72d7a5ec0 100644 +--- a/manual/tunables.texi ++++ b/manual/tunables.texi +@@ -432,7 +432,11 @@ set shared cache size in bytes for use in memory and string routines. + + @deftp Tunable glibc.cpu.x86_non_temporal_threshold + The @code{glibc.cpu.x86_non_temporal_threshold} tunable allows the user +-to set threshold in bytes for non temporal store. ++to set threshold in bytes for non temporal store. Non temporal stores ++give a hint to the hardware to move data directly to memory without ++displacing other data from the cache. This tunable is used by some ++platforms to determine when to use non temporal stores in operations ++like memmove and memcpy. + + This tunable is specific to i386 and x86-64. + @end deftp +diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h +index 19d9cc5cfe..38221d0b2a 100644 +--- a/misc/sys/cdefs.h ++++ b/misc/sys/cdefs.h +@@ -124,13 +124,10 @@ + #define __bos0(ptr) __builtin_object_size (ptr, 0) + + #if __GNUC_PREREQ (4,3) +-# define __warndecl(name, msg) \ +- extern void name (void) __attribute__((__warning__ (msg))) + # define __warnattr(msg) __attribute__((__warning__ (msg))) + # define __errordecl(name, msg) \ + extern void name (void) __attribute__((__error__ (msg))) + #else +-# define __warndecl(name, msg) extern void name (void) + # define __warnattr(msg) + # define __errordecl(name, msg) extern void name (void) + #endif +diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c +index 2cba3da38c..c217cda608 100644 +--- a/nptl/pthread_create.c ++++ b/nptl/pthread_create.c +@@ -416,8 +416,6 @@ START_THREAD_DEFN + unwind_buf.priv.data.prev = NULL; + unwind_buf.priv.data.cleanup = NULL; + +- __libc_signal_restore_set (&pd->sigmask); +- + /* Allow setxid from now onwards. */ + if (__glibc_unlikely (atomic_exchange_acq (&pd->setxid_futex, 0) == -2)) + futex_wake (&pd->setxid_futex, 1, FUTEX_PRIVATE); +@@ -427,6 +425,8 @@ START_THREAD_DEFN + /* Store the new cleanup handler info. */ + THREAD_SETMEM (pd, cleanup_jmp_buf, &unwind_buf); + ++ __libc_signal_restore_set (&pd->sigmask); ++ + /* We are either in (a) or (b), and in either case we either own + PD already (2) or are about to own PD (1), and so our only + restriction would be that we can't free PD until we know we +diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c +index 88c69d1e9c..381aa721ef 100644 +--- a/nscd/netgroupcache.c ++++ b/nscd/netgroupcache.c +@@ -248,7 +248,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + : NULL); + ndomain = (ndomain ? newbuf + ndomaindiff + : NULL); +- buffer = newbuf; ++ *tofreep = buffer = newbuf; + } + + nhost = memcpy (buffer + bufused, +@@ -319,7 +319,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE) + { + buflen *= 2; +- buffer = xrealloc (buffer, buflen); ++ *tofreep = buffer = xrealloc (buffer, buflen); + } + else if (status == NSS_STATUS_RETURN + || status == NSS_STATUS_NOTFOUND +diff --git a/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf b/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf +new file mode 100644 +index 0000000000..5b0c6a4199 +--- /dev/null ++++ b/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf +@@ -0,0 +1 @@ ++hosts: files +diff --git a/posix/bits/unistd.h b/posix/bits/unistd.h +index 725a83eb0d..7e5bb6fb1e 100644 +--- a/posix/bits/unistd.h ++++ b/posix/bits/unistd.h +@@ -193,10 +193,9 @@ __NTH (readlinkat (int __fd, const char *__restrict __path, + #endif + + extern char *__getcwd_chk (char *__buf, size_t __size, size_t __buflen) +- __THROW __wur __attr_access ((__write_only__, 1, 2)); ++ __THROW __wur; + extern char *__REDIRECT_NTH (__getcwd_alias, +- (char *__buf, size_t __size), getcwd) +- __wur __attr_access ((__write_only__, 1, 2)); ++ (char *__buf, size_t __size), getcwd) __wur; + extern char *__REDIRECT_NTH (__getcwd_chk_warn, + (char *__buf, size_t __size, size_t __buflen), + __getcwd_chk) +diff --git a/posix/unistd.h b/posix/unistd.h +index 32b8161619..acf9ee7e79 100644 +--- a/posix/unistd.h ++++ b/posix/unistd.h +@@ -517,8 +517,7 @@ extern int fchdir (int __fd) __THROW __wur; + an array is allocated with `malloc'; the array is SIZE + bytes long, unless SIZE == 0, in which case it is as + big as necessary. */ +-extern char *getcwd (char *__buf, size_t __size) __THROW __wur +- __attr_access ((__write_only__, 1, 2)); ++extern char *getcwd (char *__buf, size_t __size) __THROW __wur; + + #ifdef __USE_GNU + /* Return a malloc'd string containing the current directory name. +@@ -831,7 +830,7 @@ extern int symlinkat (const char *__from, int __tofd, + /* Like readlink but a relative PATH is interpreted relative to FD. */ + extern ssize_t readlinkat (int __fd, const char *__restrict __path, + char *__restrict __buf, size_t __len) +- __THROW __nonnull ((2, 3)) __wur __attr_access ((__read_only__, 3, 4)); ++ __THROW __nonnull ((2, 3)) __wur __attr_access ((__write_only__, 3, 4)); + #endif + + /* Remove the link NAME. */ +diff --git a/posix/wordexp-test.c b/posix/wordexp-test.c +index ed1b22308e..cb3f989cba 100644 +--- a/posix/wordexp-test.c ++++ b/posix/wordexp-test.c +@@ -183,6 +183,7 @@ struct test_case_struct + { 0, NULL, "$var", 0, 0, { NULL, }, IFS }, + { 0, NULL, "\"\\n\"", 0, 1, { "\\n", }, IFS }, + { 0, NULL, "", 0, 0, { NULL, }, IFS }, ++ { 0, NULL, "${1234567890123456789012}", 0, 0, { NULL, }, IFS }, + + /* Flags not already covered (testit() has special handling for these) */ + { 0, NULL, "one two", WRDE_DOOFFS, 2, { "one", "two", }, IFS }, +diff --git a/posix/wordexp.c b/posix/wordexp.c +index e082d94895..56289503a1 100644 +--- a/posix/wordexp.c ++++ b/posix/wordexp.c +@@ -1399,7 +1399,7 @@ envsubst: + /* Is it a numeric parameter? */ + else if (isdigit (env[0])) + { +- int n = atoi (env); ++ unsigned long n = strtoul (env, NULL, 10); + + if (n >= __libc_argc) + /* Substitute NULL. */ +diff --git a/resolv/Makefile b/resolv/Makefile +index b61c0c3e0c..dbd8f8bf4f 100644 +--- a/resolv/Makefile ++++ b/resolv/Makefile +@@ -61,6 +61,11 @@ tests += \ + tst-resolv-search \ + tst-resolv-trailing \ + ++# This test calls __res_context_send directly, which is not exported ++# from libresolv. ++tests-internal += tst-resolv-txnid-collision ++tests-static += tst-resolv-txnid-collision ++ + # These tests need libdl. + ifeq (yes,$(build-shared)) + tests += \ +@@ -191,6 +196,8 @@ $(objpfx)tst-resolv-search: $(objpfx)libresolv.so $(shared-thread-library) + $(objpfx)tst-resolv-trailing: $(objpfx)libresolv.so $(shared-thread-library) + $(objpfx)tst-resolv-threads: \ + $(libdl) $(objpfx)libresolv.so $(shared-thread-library) ++$(objpfx)tst-resolv-txnid-collision: $(objpfx)libresolv.a \ ++ $(static-thread-library) + $(objpfx)tst-resolv-canonname: \ + $(libdl) $(objpfx)libresolv.so $(shared-thread-library) + $(objpfx)tst-resolv-trustad: $(objpfx)libresolv.so $(shared-thread-library) +diff --git a/resolv/res_send.c b/resolv/res_send.c +index 7e5fec6646..70e5066031 100644 +--- a/resolv/res_send.c ++++ b/resolv/res_send.c +@@ -1342,15 +1342,6 @@ send_dg(res_state statp, + *terrno = EMSGSIZE; + return close_and_return_error (statp, resplen2); + } +- if ((recvresp1 || hp->id != anhp->id) +- && (recvresp2 || hp2->id != anhp->id)) { +- /* +- * response from old query, ignore it. +- * XXX - potential security hazard could +- * be detected here. +- */ +- goto wait; +- } + + /* Paranoia check. Due to the connected UDP socket, + the kernel has already filtered invalid addresses +@@ -1360,15 +1351,24 @@ send_dg(res_state statp, + + /* Check for the correct header layout and a matching + question. */ +- if ((recvresp1 || !res_queriesmatch(buf, buf + buflen, +- *thisansp, +- *thisansp +- + *thisanssizp)) +- && (recvresp2 || !res_queriesmatch(buf2, buf2 + buflen2, +- *thisansp, +- *thisansp +- + *thisanssizp))) +- goto wait; ++ int matching_query = 0; /* Default to no matching query. */ ++ if (!recvresp1 ++ && anhp->id == hp->id ++ && res_queriesmatch (buf, buf + buflen, ++ *thisansp, *thisansp + *thisanssizp)) ++ matching_query = 1; ++ if (!recvresp2 ++ && anhp->id == hp2->id ++ && res_queriesmatch (buf2, buf2 + buflen2, ++ *thisansp, *thisansp + *thisanssizp)) ++ matching_query = 2; ++ if (matching_query == 0) ++ /* Spurious UDP packet. Drop it and continue ++ waiting. */ ++ { ++ need_recompute = 1; ++ goto wait; ++ } + + if (anhp->rcode == SERVFAIL || + anhp->rcode == NOTIMP || +@@ -1383,7 +1383,7 @@ send_dg(res_state statp, + /* No data from the first reply. */ + resplen = 0; + /* We are waiting for a possible second reply. */ +- if (hp->id == anhp->id) ++ if (matching_query == 1) + recvresp1 = 1; + else + recvresp2 = 1; +@@ -1414,7 +1414,7 @@ send_dg(res_state statp, + return (1); + } + /* Mark which reply we received. */ +- if (recvresp1 == 0 && hp->id == anhp->id) ++ if (matching_query == 1) + recvresp1 = 1; + else + recvresp2 = 1; +diff --git a/resolv/tst-resolv-txnid-collision.c b/resolv/tst-resolv-txnid-collision.c +new file mode 100644 +index 0000000000..189b76f126 +--- /dev/null ++++ b/resolv/tst-resolv-txnid-collision.c +@@ -0,0 +1,334 @@ ++/* Test parallel queries with transaction ID collisions. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <arpa/nameser.h> ++#include <array_length.h> ++#include <resolv-internal.h> ++#include <resolv_context.h> ++#include <stdbool.h> ++#include <stdio.h> ++#include <string.h> ++#include <support/check.h> ++#include <support/check_nss.h> ++#include <support/resolv_test.h> ++#include <support/support.h> ++#include <support/test-driver.h> ++ ++/* Result of parsing a DNS question name. ++ ++ A question name has the form reorder-N-M-rcode-C.example.net, where ++ N and M are either 0 and 1, corresponding to the reorder member, ++ and C is a number that will be stored in the rcode field. ++ ++ Also see parse_qname below. */ ++struct parsed_qname ++{ ++ /* The DNS response code requested from the first server. The ++ second server always responds with RCODE zero. */ ++ int rcode; ++ ++ /* Indicates whether to perform reordering in the responses from the ++ respective server. */ ++ bool reorder[2]; ++}; ++ ++/* Fills *PARSED based on QNAME. */ ++static void ++parse_qname (struct parsed_qname *parsed, const char *qname) ++{ ++ int reorder0; ++ int reorder1; ++ int rcode; ++ char *suffix; ++ if (sscanf (qname, "reorder-%d-%d.rcode-%d.%ms", ++ &reorder0, &reorder1, &rcode, &suffix) == 4) ++ { ++ if (reorder0 != 0) ++ TEST_COMPARE (reorder0, 1); ++ if (reorder1 != 0) ++ TEST_COMPARE (reorder1, 1); ++ TEST_VERIFY (rcode >= 0 && rcode <= 15); ++ TEST_COMPARE_STRING (suffix, "example.net"); ++ free (suffix); ++ ++ parsed->rcode = rcode; ++ parsed->reorder[0] = reorder0; ++ parsed->reorder[1] = reorder1; ++ } ++ else ++ FAIL_EXIT1 ("unexpected query: %s", qname); ++} ++ ++/* Used to construct a response. The first server responds with an ++ error, the second server succeeds. */ ++static void ++build_response (const struct resolv_response_context *ctx, ++ struct resolv_response_builder *b, ++ const char *qname, uint16_t qclass, uint16_t qtype) ++{ ++ struct parsed_qname parsed; ++ parse_qname (&parsed, qname); ++ ++ switch (ctx->server_index) ++ { ++ case 0: ++ { ++ struct resolv_response_flags flags = { 0 }; ++ if (parsed.rcode == 0) ++ /* Simulate a delegation in case a NODATA (RCODE zero) ++ response is requested. */ ++ flags.clear_ra = true; ++ else ++ flags.rcode = parsed.rcode; ++ ++ resolv_response_init (b, flags); ++ resolv_response_add_question (b, qname, qclass, qtype); ++ } ++ break; ++ ++ case 1: ++ { ++ struct resolv_response_flags flags = { 0, }; ++ resolv_response_init (b, flags); ++ resolv_response_add_question (b, qname, qclass, qtype); ++ ++ resolv_response_section (b, ns_s_an); ++ resolv_response_open_record (b, qname, qclass, qtype, 0); ++ if (qtype == T_A) ++ { ++ char ipv4[4] = { 192, 0, 2, 1 }; ++ resolv_response_add_data (b, &ipv4, sizeof (ipv4)); ++ } ++ else ++ { ++ char ipv6[16] ++ = { 0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; ++ resolv_response_add_data (b, &ipv6, sizeof (ipv6)); ++ } ++ resolv_response_close_record (b); ++ } ++ break; ++ } ++} ++ ++/* Used to reorder responses. */ ++struct resolv_response_context *previous_query; ++ ++/* Used to keep track of the queries received. */ ++static int previous_server_index = -1; ++static uint16_t previous_qtype; ++ ++/* For each server, buffer the first query and then send both answers ++ to the second query, reordered if requested. */ ++static void ++response (const struct resolv_response_context *ctx, ++ struct resolv_response_builder *b, ++ const char *qname, uint16_t qclass, uint16_t qtype) ++{ ++ TEST_VERIFY (qtype == T_A || qtype == T_AAAA); ++ if (ctx->server_index != 0) ++ TEST_COMPARE (ctx->server_index, 1); ++ ++ struct parsed_qname parsed; ++ parse_qname (&parsed, qname); ++ ++ if (previous_query == NULL) ++ { ++ /* No buffered query. Record this query and do not send a ++ response. */ ++ TEST_COMPARE (previous_qtype, 0); ++ previous_query = resolv_response_context_duplicate (ctx); ++ previous_qtype = qtype; ++ resolv_response_drop (b); ++ previous_server_index = ctx->server_index; ++ ++ if (test_verbose) ++ printf ("info: buffering first query for: %s\n", qname); ++ } ++ else ++ { ++ TEST_VERIFY (previous_query != 0); ++ TEST_COMPARE (ctx->server_index, previous_server_index); ++ TEST_VERIFY (previous_qtype != qtype); /* Not a duplicate. */ ++ ++ /* If reordering, send a response for this query explicitly, and ++ then skip the implicit send. */ ++ if (parsed.reorder[ctx->server_index]) ++ { ++ if (test_verbose) ++ printf ("info: sending reordered second response for: %s\n", ++ qname); ++ build_response (ctx, b, qname, qclass, qtype); ++ resolv_response_send_udp (ctx, b); ++ resolv_response_drop (b); ++ } ++ ++ /* Build a response for the previous query and send it, thus ++ reordering the two responses. */ ++ { ++ if (test_verbose) ++ printf ("info: sending first response for: %s\n", qname); ++ struct resolv_response_builder *btmp ++ = resolv_response_builder_allocate (previous_query->query_buffer, ++ previous_query->query_length); ++ build_response (ctx, btmp, qname, qclass, previous_qtype); ++ resolv_response_send_udp (ctx, btmp); ++ resolv_response_builder_free (btmp); ++ } ++ ++ /* If not reordering, send the reply as usual. */ ++ if (!parsed.reorder[ctx->server_index]) ++ { ++ if (test_verbose) ++ printf ("info: sending non-reordered second response for: %s\n", ++ qname); ++ build_response (ctx, b, qname, qclass, qtype); ++ } ++ ++ /* Unbuffer the response and prepare for the next query. */ ++ resolv_response_context_free (previous_query); ++ previous_query = NULL; ++ previous_qtype = 0; ++ previous_server_index = -1; ++ } ++} ++ ++/* Runs a query for QNAME and checks for the expected reply. See ++ struct parsed_qname for the expected format for QNAME. */ ++static void ++test_qname (const char *qname, int rcode) ++{ ++ struct resolv_context *ctx = __resolv_context_get (); ++ TEST_VERIFY_EXIT (ctx != NULL); ++ ++ unsigned char q1[512]; ++ int q1len = res_mkquery (QUERY, qname, C_IN, T_A, NULL, 0, NULL, ++ q1, sizeof (q1)); ++ TEST_VERIFY_EXIT (q1len > 12); ++ ++ unsigned char q2[512]; ++ int q2len = res_mkquery (QUERY, qname, C_IN, T_AAAA, NULL, 0, NULL, ++ q2, sizeof (q2)); ++ TEST_VERIFY_EXIT (q2len > 12); ++ ++ /* Produce a transaction ID collision. */ ++ memcpy (q2, q1, 2); ++ ++ unsigned char ans1[512]; ++ unsigned char *ans1p = ans1; ++ unsigned char *ans2p = NULL; ++ int nans2p = 0; ++ int resplen2 = 0; ++ int ans2p_malloced = 0; ++ ++ /* Perform a parallel A/AAAA query. */ ++ int resplen1 = __res_context_send (ctx, q1, q1len, q2, q2len, ++ ans1, sizeof (ans1), &ans1p, ++ &ans2p, &nans2p, ++ &resplen2, &ans2p_malloced); ++ ++ TEST_VERIFY (resplen1 > 12); ++ TEST_VERIFY (resplen2 > 12); ++ if (resplen1 <= 12 || resplen2 <= 12) ++ return; ++ ++ if (rcode == 1 || rcode == 3) ++ { ++ /* Format Error and Name Error responses does not trigger ++ switching to the next server. */ ++ TEST_COMPARE (ans1p[3] & 0x0f, rcode); ++ TEST_COMPARE (ans2p[3] & 0x0f, rcode); ++ return; ++ } ++ ++ /* The response should be successful. */ ++ TEST_COMPARE (ans1p[3] & 0x0f, 0); ++ TEST_COMPARE (ans2p[3] & 0x0f, 0); ++ ++ /* Due to bug 19691, the answer may not be in the slot matching the ++ query. Assume that the AAAA response is the longer one. */ ++ unsigned char *a_answer; ++ int a_answer_length; ++ unsigned char *aaaa_answer; ++ int aaaa_answer_length; ++ if (resplen2 > resplen1) ++ { ++ a_answer = ans1p; ++ a_answer_length = resplen1; ++ aaaa_answer = ans2p; ++ aaaa_answer_length = resplen2; ++ } ++ else ++ { ++ a_answer = ans2p; ++ a_answer_length = resplen2; ++ aaaa_answer = ans1p; ++ aaaa_answer_length = resplen1; ++ } ++ ++ { ++ char *expected = xasprintf ("name: %s\n" ++ "address: 192.0.2.1\n", ++ qname); ++ check_dns_packet (qname, a_answer, a_answer_length, expected); ++ free (expected); ++ } ++ { ++ char *expected = xasprintf ("name: %s\n" ++ "address: 2001:db8::1\n", ++ qname); ++ check_dns_packet (qname, aaaa_answer, aaaa_answer_length, expected); ++ free (expected); ++ } ++ ++ if (ans2p_malloced) ++ free (ans2p); ++ ++ __resolv_context_put (ctx); ++} ++ ++static int ++do_test (void) ++{ ++ struct resolv_test *aux = resolv_test_start ++ ((struct resolv_redirect_config) ++ { ++ .response_callback = response, ++ ++ /* The response callback use global state (the previous_* ++ variables), and query processing must therefore be ++ serialized. */ ++ .single_thread_udp = true, ++ }); ++ ++ for (int rcode = 0; rcode <= 5; ++rcode) ++ for (int do_reorder_0 = 0; do_reorder_0 < 2; ++do_reorder_0) ++ for (int do_reorder_1 = 0; do_reorder_1 < 2; ++do_reorder_1) ++ { ++ char *qname = xasprintf ("reorder-%d-%d.rcode-%d.example.net", ++ do_reorder_0, do_reorder_1, rcode); ++ test_qname (qname, rcode); ++ free (qname); ++ } ++ ++ resolv_test_end (aux); ++ ++ return 0; ++} ++ ++#include <support/test-driver.c> +diff --git a/rt/Makefile b/rt/Makefile +index dab5d62a57..93502cfaa7 100644 +--- a/rt/Makefile ++++ b/rt/Makefile +@@ -44,6 +44,7 @@ tests := tst-shm tst-timer tst-timer2 \ + tst-aio7 tst-aio8 tst-aio9 tst-aio10 \ + tst-mqueue1 tst-mqueue2 tst-mqueue3 tst-mqueue4 \ + tst-mqueue5 tst-mqueue6 tst-mqueue7 tst-mqueue8 tst-mqueue9 \ ++ tst-bz28213 \ + tst-timer3 tst-timer4 tst-timer5 \ + tst-cpuclock2 tst-cputimer1 tst-cputimer2 tst-cputimer3 \ + tst-shm-cancel +diff --git a/rt/tst-bz28213.c b/rt/tst-bz28213.c +new file mode 100644 +index 0000000000..0c096b5a0a +--- /dev/null ++++ b/rt/tst-bz28213.c +@@ -0,0 +1,101 @@ ++/* Bug 28213: test for NULL pointer dereference in mq_notify. ++ Copyright (C) The GNU Toolchain Authors. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <errno.h> ++#include <sys/types.h> ++#include <sys/stat.h> ++#include <fcntl.h> ++#include <unistd.h> ++#include <mqueue.h> ++#include <signal.h> ++#include <stdlib.h> ++#include <string.h> ++#include <support/check.h> ++ ++static mqd_t m = -1; ++static const char msg[] = "hello"; ++ ++static void ++check_bz28213_cb (union sigval sv) ++{ ++ char buf[sizeof (msg)]; ++ ++ (void) sv; ++ ++ TEST_VERIFY_EXIT ((size_t) mq_receive (m, buf, sizeof (buf), NULL) ++ == sizeof (buf)); ++ TEST_VERIFY_EXIT (memcmp (buf, msg, sizeof (buf)) == 0); ++ ++ exit (0); ++} ++ ++static void ++check_bz28213 (void) ++{ ++ struct sigevent sev; ++ ++ memset (&sev, '\0', sizeof (sev)); ++ sev.sigev_notify = SIGEV_THREAD; ++ sev.sigev_notify_function = check_bz28213_cb; ++ ++ /* Step 1: Register & unregister notifier. ++ Helper thread should receive NOTIFY_REMOVED notification. ++ In a vulnerable version of glibc, NULL pointer dereference follows. */ ++ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0); ++ TEST_VERIFY_EXIT (mq_notify (m, NULL) == 0); ++ ++ /* Step 2: Once again, register notification. ++ Try to send one message. ++ Test is considered successful, if the callback does exit (0). */ ++ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0); ++ TEST_VERIFY_EXIT (mq_send (m, msg, sizeof (msg), 1) == 0); ++ ++ /* Wait... */ ++ pause (); ++} ++ ++static int ++do_test (void) ++{ ++ static const char m_name[] = "/bz28213_queue"; ++ struct mq_attr m_attr; ++ ++ memset (&m_attr, '\0', sizeof (m_attr)); ++ m_attr.mq_maxmsg = 1; ++ m_attr.mq_msgsize = sizeof (msg); ++ ++ m = mq_open (m_name, ++ O_RDWR | O_CREAT | O_EXCL, ++ 0600, ++ &m_attr); ++ ++ if (m < 0) ++ { ++ if (errno == ENOSYS) ++ FAIL_UNSUPPORTED ("POSIX message queues are not implemented\n"); ++ FAIL_EXIT1 ("Failed to create POSIX message queue: %m\n"); ++ } ++ ++ TEST_VERIFY_EXIT (mq_unlink (m_name) == 0); ++ ++ check_bz28213 (); ++ ++ return 0; ++} ++ ++#include <support/test-driver.c> +diff --git a/stdio-common/Makefile b/stdio-common/Makefile +index 8475fd1f09..eff0c98d82 100644 +--- a/stdio-common/Makefile ++++ b/stdio-common/Makefile +@@ -69,7 +69,8 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \ + tst-printf-bz25691 \ + tst-vfprintf-width-prec-alloc \ + tst-printf-fp-free \ +- tst-printf-fp-leak ++ tst-printf-fp-leak \ ++ test-strerr + + + test-srcs = tst-unbputc tst-printf tst-printfsz-islongdouble +diff --git a/stdio-common/errlist.c b/stdio-common/errlist.c +index d15f13a22a..2ecf121674 100644 +--- a/stdio-common/errlist.c ++++ b/stdio-common/errlist.c +@@ -20,9 +20,13 @@ + #include <libintl.h> + #include <array_length.h> + ++#ifndef ERR_MAP ++# define ERR_MAP(n) n ++#endif ++ + const char *const _sys_errlist_internal[] = + { +-#define _S(n, str) [n] = str, ++#define _S(n, str) [ERR_MAP(n)] = str, + #include <errlist.h> + #undef _S + }; +@@ -41,20 +45,21 @@ static const union sys_errname_t + { + #define MSGSTRFIELD1(line) str##line + #define MSGSTRFIELD(line) MSGSTRFIELD1(line) +-#define _S(n, str) char MSGSTRFIELD(__LINE__)[sizeof(str)]; ++#define _S(n, str) char MSGSTRFIELD(__LINE__)[sizeof(#n)]; + #include <errlist.h> + #undef _S + }; + char str[0]; + } _sys_errname = { { +-#define _S(n, s) s, ++#define _S(n, s) #n, + #include <errlist.h> + #undef _S + } }; + + static const unsigned short _sys_errnameidx[] = + { +-#define _S(n, s) [n] = offsetof(union sys_errname_t, MSGSTRFIELD(__LINE__)), ++#define _S(n, s) \ ++ [ERR_MAP(n)] = offsetof(union sys_errname_t, MSGSTRFIELD(__LINE__)), + #include <errlist.h> + #undef _S + }; +diff --git a/stdio-common/test-strerr.c b/stdio-common/test-strerr.c +index fded208118..d77b81d507 100644 +--- a/stdio-common/test-strerr.c ++++ b/stdio-common/test-strerr.c +@@ -18,46 +18,672 @@ + + #include <string.h> + #include <errno.h> +-#include <array_length.h> + + #include <support/support.h> + #include <support/check.h> + +-#define N_(name) name +- +-static const char *const errlist[] = +- { +-/* This file is auto-generated from errlist.def. */ +-#include <errlist.h> +- }; +- +-#define MSGSTR_T errname_t +-#define MSGSTR errname +-#define MSGIDX errnameidx +-#include <errlist-name.h> +-#undef MSGSTR +-#undef MSGIDX +- + static int + do_test (void) + { +- TEST_VERIFY (strerrordesc_np (-1) == NULL); +- TEST_VERIFY (strerrordesc_np (array_length (errlist)) == NULL); +- for (size_t i = 0; i < array_length (errlist); i++) +- { +- if (errlist[i] == NULL) +- continue; +- TEST_COMPARE_STRING (strerrordesc_np (i), errlist[i]); +- } ++ TEST_COMPARE_STRING (strerrordesc_np (0), "Success"); ++ TEST_COMPARE_STRING (strerrorname_np (0), "0"); + +- TEST_VERIFY (strerrorname_np (-1) == NULL); +- TEST_VERIFY (strerrorname_np (array_length (errlist)) == NULL); +- for (size_t i = 0; i < array_length (errlist); i++) +- { +- if (errlist[i] == NULL) +- continue; +- TEST_COMPARE_STRING (strerrorname_np (i), errname.str + errnameidx[i]); +- } ++#ifdef EPERM ++ TEST_COMPARE_STRING (strerrordesc_np (EPERM), "Operation not permitted"); ++ TEST_COMPARE_STRING (strerrorname_np (EPERM), "EPERM"); ++#endif ++#ifdef ENOENT ++ TEST_COMPARE_STRING (strerrordesc_np (ENOENT), ++ "No such file or directory"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOENT), "ENOENT"); ++#endif ++#ifdef ESRCH ++ TEST_COMPARE_STRING (strerrordesc_np (ESRCH), "No such process"); ++ TEST_COMPARE_STRING (strerrorname_np (ESRCH), "ESRCH"); ++#endif ++#ifdef EINTR ++ TEST_COMPARE_STRING (strerrordesc_np (EINTR), "Interrupted system call"); ++ TEST_COMPARE_STRING (strerrorname_np (EINTR), "EINTR"); ++#endif ++#ifdef EIO ++ TEST_COMPARE_STRING (strerrordesc_np (EIO), "Input/output error"); ++ TEST_COMPARE_STRING (strerrorname_np (EIO), "EIO"); ++#endif ++#ifdef ENXIO ++ TEST_COMPARE_STRING (strerrordesc_np (ENXIO), "No such device or address"); ++ TEST_COMPARE_STRING (strerrorname_np (ENXIO), "ENXIO"); ++#endif ++#ifdef E2BIG ++ TEST_COMPARE_STRING (strerrordesc_np (E2BIG), "Argument list too long"); ++ TEST_COMPARE_STRING (strerrorname_np (E2BIG), "E2BIG"); ++#endif ++#ifdef ENOEXEC ++ TEST_COMPARE_STRING (strerrordesc_np (ENOEXEC), "Exec format error"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOEXEC), "ENOEXEC"); ++#endif ++#ifdef EBADF ++ TEST_COMPARE_STRING (strerrordesc_np (EBADF), "Bad file descriptor"); ++ TEST_COMPARE_STRING (strerrorname_np (EBADF), "EBADF"); ++#endif ++#ifdef ECHILD ++ TEST_COMPARE_STRING (strerrordesc_np (ECHILD), "No child processes"); ++ TEST_COMPARE_STRING (strerrorname_np (ECHILD), "ECHILD"); ++#endif ++#ifdef EDEADLK ++ TEST_COMPARE_STRING (strerrordesc_np (EDEADLK), ++ "Resource deadlock avoided"); ++ TEST_COMPARE_STRING (strerrorname_np (EDEADLK), "EDEADLK"); ++#endif ++#ifdef ENOMEM ++ TEST_COMPARE_STRING (strerrordesc_np (ENOMEM), "Cannot allocate memory"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOMEM), "ENOMEM"); ++#endif ++#ifdef EACCES ++ TEST_COMPARE_STRING (strerrordesc_np (EACCES), "Permission denied"); ++ TEST_COMPARE_STRING (strerrorname_np (EACCES), "EACCES"); ++#endif ++#ifdef EFAULT ++ TEST_COMPARE_STRING (strerrordesc_np (EFAULT), "Bad address"); ++ TEST_COMPARE_STRING (strerrorname_np (EFAULT), "EFAULT"); ++#endif ++#ifdef ENOTBLK ++ TEST_COMPARE_STRING (strerrordesc_np (ENOTBLK), "Block device required"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOTBLK), "ENOTBLK"); ++#endif ++#ifdef EBUSY ++ TEST_COMPARE_STRING (strerrordesc_np (EBUSY), "Device or resource busy"); ++ TEST_COMPARE_STRING (strerrorname_np (EBUSY), "EBUSY"); ++#endif ++#ifdef EEXIST ++ TEST_COMPARE_STRING (strerrordesc_np (EEXIST), "File exists"); ++ TEST_COMPARE_STRING (strerrorname_np (EEXIST), "EEXIST"); ++#endif ++#ifdef EXDEV ++ TEST_COMPARE_STRING (strerrordesc_np (EXDEV), "Invalid cross-device link"); ++ TEST_COMPARE_STRING (strerrorname_np (EXDEV), "EXDEV"); ++#endif ++#ifdef ENODEV ++ TEST_COMPARE_STRING (strerrordesc_np (ENODEV), "No such device"); ++ TEST_COMPARE_STRING (strerrorname_np (ENODEV), "ENODEV"); ++#endif ++#ifdef ENOTDIR ++ TEST_COMPARE_STRING (strerrordesc_np (ENOTDIR), "Not a directory"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOTDIR), "ENOTDIR"); ++#endif ++#ifdef EISDIR ++ TEST_COMPARE_STRING (strerrordesc_np (EISDIR), "Is a directory"); ++ TEST_COMPARE_STRING (strerrorname_np (EISDIR), "EISDIR"); ++#endif ++#ifdef EINVAL ++ TEST_COMPARE_STRING (strerrordesc_np (EINVAL), "Invalid argument"); ++ TEST_COMPARE_STRING (strerrorname_np (EINVAL), "EINVAL"); ++#endif ++#ifdef EMFILE ++ TEST_COMPARE_STRING (strerrordesc_np (EMFILE), "Too many open files"); ++ TEST_COMPARE_STRING (strerrorname_np (EMFILE), "EMFILE"); ++#endif ++#ifdef ENFILE ++ TEST_COMPARE_STRING (strerrordesc_np (ENFILE), ++ "Too many open files in system"); ++ TEST_COMPARE_STRING (strerrorname_np (ENFILE), "ENFILE"); ++#endif ++#ifdef ENOTTY ++ TEST_COMPARE_STRING (strerrordesc_np (ENOTTY), ++ "Inappropriate ioctl for device"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOTTY), "ENOTTY"); ++#endif ++#ifdef ETXTBSY ++ TEST_COMPARE_STRING (strerrordesc_np (ETXTBSY), "Text file busy"); ++ TEST_COMPARE_STRING (strerrorname_np (ETXTBSY), "ETXTBSY"); ++#endif ++#ifdef EFBIG ++ TEST_COMPARE_STRING (strerrordesc_np (EFBIG), "File too large"); ++ TEST_COMPARE_STRING (strerrorname_np (EFBIG), "EFBIG"); ++#endif ++#ifdef ENOSPC ++ TEST_COMPARE_STRING (strerrordesc_np (ENOSPC), "No space left on device"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOSPC), "ENOSPC"); ++#endif ++#ifdef ESPIPE ++ TEST_COMPARE_STRING (strerrordesc_np (ESPIPE), "Illegal seek"); ++ TEST_COMPARE_STRING (strerrorname_np (ESPIPE), "ESPIPE"); ++#endif ++#ifdef EROFS ++ TEST_COMPARE_STRING (strerrordesc_np (EROFS), "Read-only file system"); ++ TEST_COMPARE_STRING (strerrorname_np (EROFS), "EROFS"); ++#endif ++#ifdef EMLINK ++ TEST_COMPARE_STRING (strerrordesc_np (EMLINK), "Too many links"); ++ TEST_COMPARE_STRING (strerrorname_np (EMLINK), "EMLINK"); ++#endif ++#ifdef EPIPE ++ TEST_COMPARE_STRING (strerrordesc_np (EPIPE), "Broken pipe"); ++ TEST_COMPARE_STRING (strerrorname_np (EPIPE), "EPIPE"); ++#endif ++#ifdef EDOM ++ TEST_COMPARE_STRING (strerrordesc_np (EDOM), ++ "Numerical argument out of domain"); ++ TEST_COMPARE_STRING (strerrorname_np (EDOM), "EDOM"); ++#endif ++#ifdef ERANGE ++ TEST_COMPARE_STRING (strerrordesc_np (ERANGE), ++ "Numerical result out of range"); ++ TEST_COMPARE_STRING (strerrorname_np (ERANGE), "ERANGE"); ++#endif ++#ifdef EAGAIN ++ TEST_COMPARE_STRING (strerrordesc_np (EAGAIN), ++ "Resource temporarily unavailable"); ++ TEST_COMPARE_STRING (strerrorname_np (EAGAIN), "EAGAIN"); ++#endif ++#ifdef EINPROGRESS ++ TEST_COMPARE_STRING (strerrordesc_np (EINPROGRESS), ++ "Operation now in progress"); ++ TEST_COMPARE_STRING (strerrorname_np (EINPROGRESS), "EINPROGRESS"); ++#endif ++#ifdef EALREADY ++ TEST_COMPARE_STRING (strerrordesc_np (EALREADY), ++ "Operation already in progress"); ++ TEST_COMPARE_STRING (strerrorname_np (EALREADY), "EALREADY"); ++#endif ++#ifdef ENOTSOCK ++ TEST_COMPARE_STRING (strerrordesc_np (ENOTSOCK), ++ "Socket operation on non-socket"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOTSOCK), "ENOTSOCK"); ++#endif ++#ifdef EMSGSIZE ++ TEST_COMPARE_STRING (strerrordesc_np (EMSGSIZE), "Message too long"); ++ TEST_COMPARE_STRING (strerrorname_np (EMSGSIZE), "EMSGSIZE"); ++#endif ++#ifdef EPROTOTYPE ++ TEST_COMPARE_STRING (strerrordesc_np (EPROTOTYPE), ++ "Protocol wrong type for socket"); ++ TEST_COMPARE_STRING (strerrorname_np (EPROTOTYPE), "EPROTOTYPE"); ++#endif ++#ifdef ENOPROTOOPT ++ TEST_COMPARE_STRING (strerrordesc_np (ENOPROTOOPT), ++ "Protocol not available"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOPROTOOPT), "ENOPROTOOPT"); ++#endif ++#ifdef EPROTONOSUPPORT ++ TEST_COMPARE_STRING (strerrordesc_np (EPROTONOSUPPORT), ++ "Protocol not supported"); ++ TEST_COMPARE_STRING (strerrorname_np (EPROTONOSUPPORT), "EPROTONOSUPPORT"); ++#endif ++#ifdef ESOCKTNOSUPPORT ++ TEST_COMPARE_STRING (strerrordesc_np (ESOCKTNOSUPPORT), ++ "Socket type not supported"); ++ TEST_COMPARE_STRING (strerrorname_np (ESOCKTNOSUPPORT), "ESOCKTNOSUPPORT"); ++#endif ++#ifdef EOPNOTSUPP ++ TEST_COMPARE_STRING (strerrordesc_np (EOPNOTSUPP), ++ "Operation not supported"); ++ TEST_COMPARE_STRING (strerrorname_np (EOPNOTSUPP), "EOPNOTSUPP"); ++#endif ++#ifdef EPFNOSUPPORT ++ TEST_COMPARE_STRING (strerrordesc_np (EPFNOSUPPORT), ++ "Protocol family not supported"); ++ TEST_COMPARE_STRING (strerrorname_np (EPFNOSUPPORT), "EPFNOSUPPORT"); ++#endif ++#ifdef EAFNOSUPPORT ++ TEST_COMPARE_STRING (strerrordesc_np (EAFNOSUPPORT), ++ "Address family not supported by protocol"); ++ TEST_COMPARE_STRING (strerrorname_np (EAFNOSUPPORT), "EAFNOSUPPORT"); ++#endif ++#ifdef EADDRINUSE ++ TEST_COMPARE_STRING (strerrordesc_np (EADDRINUSE), ++ "Address already in use"); ++ TEST_COMPARE_STRING (strerrorname_np (EADDRINUSE), "EADDRINUSE"); ++#endif ++#ifdef EADDRNOTAVAIL ++ TEST_COMPARE_STRING (strerrordesc_np (EADDRNOTAVAIL), ++ "Cannot assign requested address"); ++ TEST_COMPARE_STRING (strerrorname_np (EADDRNOTAVAIL), "EADDRNOTAVAIL"); ++#endif ++#ifdef ENETDOWN ++ TEST_COMPARE_STRING (strerrordesc_np (ENETDOWN), "Network is down"); ++ TEST_COMPARE_STRING (strerrorname_np (ENETDOWN), "ENETDOWN"); ++#endif ++#ifdef ENETUNREACH ++ TEST_COMPARE_STRING (strerrordesc_np (ENETUNREACH), ++ "Network is unreachable"); ++ TEST_COMPARE_STRING (strerrorname_np (ENETUNREACH), "ENETUNREACH"); ++#endif ++#ifdef ENETRESET ++ TEST_COMPARE_STRING (strerrordesc_np (ENETRESET), ++ "Network dropped connection on reset"); ++ TEST_COMPARE_STRING (strerrorname_np (ENETRESET), "ENETRESET"); ++#endif ++#ifdef ECONNABORTED ++ TEST_COMPARE_STRING (strerrordesc_np (ECONNABORTED), ++ "Software caused connection abort"); ++ TEST_COMPARE_STRING (strerrorname_np (ECONNABORTED), "ECONNABORTED"); ++#endif ++#ifdef ECONNRESET ++ TEST_COMPARE_STRING (strerrordesc_np (ECONNRESET), ++ "Connection reset by peer"); ++ TEST_COMPARE_STRING (strerrorname_np (ECONNRESET), "ECONNRESET"); ++#endif ++#ifdef ENOBUFS ++ TEST_COMPARE_STRING (strerrordesc_np (ENOBUFS), ++ "No buffer space available"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOBUFS), "ENOBUFS"); ++#endif ++#ifdef EISCONN ++ TEST_COMPARE_STRING (strerrordesc_np (EISCONN), ++ "Transport endpoint is already connected"); ++ TEST_COMPARE_STRING (strerrorname_np (EISCONN), "EISCONN"); ++#endif ++#ifdef ENOTCONN ++ TEST_COMPARE_STRING (strerrordesc_np (ENOTCONN), ++ "Transport endpoint is not connected"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOTCONN), "ENOTCONN"); ++#endif ++#ifdef EDESTADDRREQ ++ TEST_COMPARE_STRING (strerrordesc_np (EDESTADDRREQ), ++ "Destination address required"); ++ TEST_COMPARE_STRING (strerrorname_np (EDESTADDRREQ), "EDESTADDRREQ"); ++#endif ++#ifdef ESHUTDOWN ++ TEST_COMPARE_STRING (strerrordesc_np (ESHUTDOWN), ++ "Cannot send after transport endpoint shutdown"); ++ TEST_COMPARE_STRING (strerrorname_np (ESHUTDOWN), "ESHUTDOWN"); ++#endif ++#ifdef ETOOMANYREFS ++ TEST_COMPARE_STRING (strerrordesc_np (ETOOMANYREFS), ++ "Too many references: cannot splice"); ++ TEST_COMPARE_STRING (strerrorname_np (ETOOMANYREFS), "ETOOMANYREFS"); ++#endif ++#ifdef ETIMEDOUT ++ TEST_COMPARE_STRING (strerrordesc_np (ETIMEDOUT), "Connection timed out"); ++ TEST_COMPARE_STRING (strerrorname_np (ETIMEDOUT), "ETIMEDOUT"); ++#endif ++#ifdef ECONNREFUSED ++ TEST_COMPARE_STRING (strerrordesc_np (ECONNREFUSED), "Connection refused"); ++ TEST_COMPARE_STRING (strerrorname_np (ECONNREFUSED), "ECONNREFUSED"); ++#endif ++#ifdef ELOOP ++ TEST_COMPARE_STRING (strerrordesc_np (ELOOP), ++ "Too many levels of symbolic links"); ++ TEST_COMPARE_STRING (strerrorname_np (ELOOP), "ELOOP"); ++#endif ++#ifdef ENAMETOOLONG ++ TEST_COMPARE_STRING (strerrordesc_np (ENAMETOOLONG), "File name too long"); ++ TEST_COMPARE_STRING (strerrorname_np (ENAMETOOLONG), "ENAMETOOLONG"); ++#endif ++#ifdef EHOSTDOWN ++ TEST_COMPARE_STRING (strerrordesc_np (EHOSTDOWN), "Host is down"); ++ TEST_COMPARE_STRING (strerrorname_np (EHOSTDOWN), "EHOSTDOWN"); ++#endif ++#ifdef EHOSTUNREACH ++ TEST_COMPARE_STRING (strerrordesc_np (EHOSTUNREACH), "No route to host"); ++ TEST_COMPARE_STRING (strerrorname_np (EHOSTUNREACH), "EHOSTUNREACH"); ++#endif ++#ifdef ENOTEMPTY ++ TEST_COMPARE_STRING (strerrordesc_np (ENOTEMPTY), "Directory not empty"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOTEMPTY), "ENOTEMPTY"); ++#endif ++#ifdef EUSERS ++ TEST_COMPARE_STRING (strerrordesc_np (EUSERS), "Too many users"); ++ TEST_COMPARE_STRING (strerrorname_np (EUSERS), "EUSERS"); ++#endif ++#ifdef EDQUOT ++ TEST_COMPARE_STRING (strerrordesc_np (EDQUOT), "Disk quota exceeded"); ++ TEST_COMPARE_STRING (strerrorname_np (EDQUOT), "EDQUOT"); ++#endif ++#ifdef ESTALE ++ TEST_COMPARE_STRING (strerrordesc_np (ESTALE), "Stale file handle"); ++ TEST_COMPARE_STRING (strerrorname_np (ESTALE), "ESTALE"); ++#endif ++#ifdef EREMOTE ++ TEST_COMPARE_STRING (strerrordesc_np (EREMOTE), "Object is remote"); ++ TEST_COMPARE_STRING (strerrorname_np (EREMOTE), "EREMOTE"); ++#endif ++#ifdef ENOLCK ++ TEST_COMPARE_STRING (strerrordesc_np (ENOLCK), "No locks available"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOLCK), "ENOLCK"); ++#endif ++#ifdef ENOSYS ++ TEST_COMPARE_STRING (strerrordesc_np (ENOSYS), "Function not implemented"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOSYS), "ENOSYS"); ++#endif ++#ifdef EILSEQ ++ TEST_COMPARE_STRING (strerrordesc_np (EILSEQ), ++ "Invalid or incomplete multibyte or wide character"); ++ TEST_COMPARE_STRING (strerrorname_np (EILSEQ), "EILSEQ"); ++#endif ++#ifdef EBADMSG ++ TEST_COMPARE_STRING (strerrordesc_np (EBADMSG), "Bad message"); ++ TEST_COMPARE_STRING (strerrorname_np (EBADMSG), "EBADMSG"); ++#endif ++#ifdef EIDRM ++ TEST_COMPARE_STRING (strerrordesc_np (EIDRM), "Identifier removed"); ++ TEST_COMPARE_STRING (strerrorname_np (EIDRM), "EIDRM"); ++#endif ++#ifdef EMULTIHOP ++ TEST_COMPARE_STRING (strerrordesc_np (EMULTIHOP), "Multihop attempted"); ++ TEST_COMPARE_STRING (strerrorname_np (EMULTIHOP), "EMULTIHOP"); ++#endif ++#ifdef ENODATA ++ TEST_COMPARE_STRING (strerrordesc_np (ENODATA), "No data available"); ++ TEST_COMPARE_STRING (strerrorname_np (ENODATA), "ENODATA"); ++#endif ++#ifdef ENOLINK ++ TEST_COMPARE_STRING (strerrordesc_np (ENOLINK), "Link has been severed"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOLINK), "ENOLINK"); ++#endif ++#ifdef ENOMSG ++ TEST_COMPARE_STRING (strerrordesc_np (ENOMSG), ++ "No message of desired type"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOMSG), "ENOMSG"); ++#endif ++#ifdef ENOSR ++ TEST_COMPARE_STRING (strerrordesc_np (ENOSR), "Out of streams resources"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOSR), "ENOSR"); ++#endif ++#ifdef ENOSTR ++ TEST_COMPARE_STRING (strerrordesc_np (ENOSTR), "Device not a stream"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOSTR), "ENOSTR"); ++#endif ++#ifdef EOVERFLOW ++ TEST_COMPARE_STRING (strerrordesc_np (EOVERFLOW), ++ "Value too large for defined data type"); ++ TEST_COMPARE_STRING (strerrorname_np (EOVERFLOW), "EOVERFLOW"); ++#endif ++#ifdef EPROTO ++ TEST_COMPARE_STRING (strerrordesc_np (EPROTO), "Protocol error"); ++ TEST_COMPARE_STRING (strerrorname_np (EPROTO), "EPROTO"); ++#endif ++#ifdef ETIME ++ TEST_COMPARE_STRING (strerrordesc_np (ETIME), "Timer expired"); ++ TEST_COMPARE_STRING (strerrorname_np (ETIME), "ETIME"); ++#endif ++#ifdef ECANCELED ++ TEST_COMPARE_STRING (strerrordesc_np (ECANCELED), "Operation canceled"); ++ TEST_COMPARE_STRING (strerrorname_np (ECANCELED), "ECANCELED"); ++#endif ++#ifdef EOWNERDEAD ++ TEST_COMPARE_STRING (strerrordesc_np (EOWNERDEAD), "Owner died"); ++ TEST_COMPARE_STRING (strerrorname_np (EOWNERDEAD), "EOWNERDEAD"); ++#endif ++#ifdef ENOTRECOVERABLE ++ TEST_COMPARE_STRING (strerrordesc_np (ENOTRECOVERABLE), ++ "State not recoverable"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOTRECOVERABLE), "ENOTRECOVERABLE"); ++#endif ++#ifdef ERESTART ++ TEST_COMPARE_STRING (strerrordesc_np (ERESTART), ++ "Interrupted system call should be restarted"); ++ TEST_COMPARE_STRING (strerrorname_np (ERESTART), "ERESTART"); ++#endif ++#ifdef ECHRNG ++ TEST_COMPARE_STRING (strerrordesc_np (ECHRNG), ++ "Channel number out of range"); ++ TEST_COMPARE_STRING (strerrorname_np (ECHRNG), "ECHRNG"); ++#endif ++#ifdef EL2NSYNC ++ TEST_COMPARE_STRING (strerrordesc_np (EL2NSYNC), ++ "Level 2 not synchronized"); ++ TEST_COMPARE_STRING (strerrorname_np (EL2NSYNC), "EL2NSYNC"); ++#endif ++#ifdef EL3HLT ++ TEST_COMPARE_STRING (strerrordesc_np (EL3HLT), "Level 3 halted"); ++ TEST_COMPARE_STRING (strerrorname_np (EL3HLT), "EL3HLT"); ++#endif ++#ifdef EL3RST ++ TEST_COMPARE_STRING (strerrordesc_np (EL3RST), "Level 3 reset"); ++ TEST_COMPARE_STRING (strerrorname_np (EL3RST), "EL3RST"); ++#endif ++#ifdef ELNRNG ++ TEST_COMPARE_STRING (strerrordesc_np (ELNRNG), "Link number out of range"); ++ TEST_COMPARE_STRING (strerrorname_np (ELNRNG), "ELNRNG"); ++#endif ++#ifdef EUNATCH ++ TEST_COMPARE_STRING (strerrordesc_np (EUNATCH), ++ "Protocol driver not attached"); ++ TEST_COMPARE_STRING (strerrorname_np (EUNATCH), "EUNATCH"); ++#endif ++#ifdef ENOCSI ++ TEST_COMPARE_STRING (strerrordesc_np (ENOCSI), ++ "No CSI structure available"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOCSI), "ENOCSI"); ++#endif ++#ifdef EL2HLT ++ TEST_COMPARE_STRING (strerrordesc_np (EL2HLT), "Level 2 halted"); ++ TEST_COMPARE_STRING (strerrorname_np (EL2HLT), "EL2HLT"); ++#endif ++#ifdef EBADE ++ TEST_COMPARE_STRING (strerrordesc_np (EBADE), "Invalid exchange"); ++ TEST_COMPARE_STRING (strerrorname_np (EBADE), "EBADE"); ++#endif ++#ifdef EBADR ++ TEST_COMPARE_STRING (strerrordesc_np (EBADR), ++ "Invalid request descriptor"); ++ TEST_COMPARE_STRING (strerrorname_np (EBADR), "EBADR"); ++#endif ++#ifdef EXFULL ++ TEST_COMPARE_STRING (strerrordesc_np (EXFULL), "Exchange full"); ++ TEST_COMPARE_STRING (strerrorname_np (EXFULL), "EXFULL"); ++#endif ++#ifdef ENOANO ++ TEST_COMPARE_STRING (strerrordesc_np (ENOANO), "No anode"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOANO), "ENOANO"); ++#endif ++#ifdef EBADRQC ++ TEST_COMPARE_STRING (strerrordesc_np (EBADRQC), "Invalid request code"); ++ TEST_COMPARE_STRING (strerrorname_np (EBADRQC), "EBADRQC"); ++#endif ++#ifdef EBADSLT ++ TEST_COMPARE_STRING (strerrordesc_np (EBADSLT), "Invalid slot"); ++ TEST_COMPARE_STRING (strerrorname_np (EBADSLT), "EBADSLT"); ++#endif ++#ifdef EBFONT ++ TEST_COMPARE_STRING (strerrordesc_np (EBFONT), "Bad font file format"); ++ TEST_COMPARE_STRING (strerrorname_np (EBFONT), "EBFONT"); ++#endif ++#ifdef ENONET ++ TEST_COMPARE_STRING (strerrordesc_np (ENONET), ++ "Machine is not on the network"); ++ TEST_COMPARE_STRING (strerrorname_np (ENONET), "ENONET"); ++#endif ++#ifdef ENOPKG ++ TEST_COMPARE_STRING (strerrordesc_np (ENOPKG), "Package not installed"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOPKG), "ENOPKG"); ++#endif ++#ifdef EADV ++ TEST_COMPARE_STRING (strerrordesc_np (EADV), "Advertise error"); ++ TEST_COMPARE_STRING (strerrorname_np (EADV), "EADV"); ++#endif ++#ifdef ESRMNT ++ TEST_COMPARE_STRING (strerrordesc_np (ESRMNT), "Srmount error"); ++ TEST_COMPARE_STRING (strerrorname_np (ESRMNT), "ESRMNT"); ++#endif ++#ifdef ECOMM ++ TEST_COMPARE_STRING (strerrordesc_np (ECOMM), ++ "Communication error on send"); ++ TEST_COMPARE_STRING (strerrorname_np (ECOMM), "ECOMM"); ++#endif ++#ifdef EDOTDOT ++ TEST_COMPARE_STRING (strerrordesc_np (EDOTDOT), "RFS specific error"); ++ TEST_COMPARE_STRING (strerrorname_np (EDOTDOT), "EDOTDOT"); ++#endif ++#ifdef ENOTUNIQ ++ TEST_COMPARE_STRING (strerrordesc_np (ENOTUNIQ), ++ "Name not unique on network"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOTUNIQ), "ENOTUNIQ"); ++#endif ++#ifdef EBADFD ++ TEST_COMPARE_STRING (strerrordesc_np (EBADFD), ++ "File descriptor in bad state"); ++ TEST_COMPARE_STRING (strerrorname_np (EBADFD), "EBADFD"); ++#endif ++#ifdef EREMCHG ++ TEST_COMPARE_STRING (strerrordesc_np (EREMCHG), "Remote address changed"); ++ TEST_COMPARE_STRING (strerrorname_np (EREMCHG), "EREMCHG"); ++#endif ++#ifdef ELIBACC ++ TEST_COMPARE_STRING (strerrordesc_np (ELIBACC), ++ "Can not access a needed shared library"); ++ TEST_COMPARE_STRING (strerrorname_np (ELIBACC), "ELIBACC"); ++#endif ++#ifdef ELIBBAD ++ TEST_COMPARE_STRING (strerrordesc_np (ELIBBAD), ++ "Accessing a corrupted shared library"); ++ TEST_COMPARE_STRING (strerrorname_np (ELIBBAD), "ELIBBAD"); ++#endif ++#ifdef ELIBSCN ++ TEST_COMPARE_STRING (strerrordesc_np (ELIBSCN), ++ ".lib section in a.out corrupted"); ++ TEST_COMPARE_STRING (strerrorname_np (ELIBSCN), "ELIBSCN"); ++#endif ++#ifdef ELIBMAX ++ TEST_COMPARE_STRING (strerrordesc_np (ELIBMAX), ++ "Attempting to link in too many shared libraries"); ++ TEST_COMPARE_STRING (strerrorname_np (ELIBMAX), "ELIBMAX"); ++#endif ++#ifdef ELIBEXEC ++ TEST_COMPARE_STRING (strerrordesc_np (ELIBEXEC), ++ "Cannot exec a shared library directly"); ++ TEST_COMPARE_STRING (strerrorname_np (ELIBEXEC), "ELIBEXEC"); ++#endif ++#ifdef ESTRPIPE ++ TEST_COMPARE_STRING (strerrordesc_np (ESTRPIPE), "Streams pipe error"); ++ TEST_COMPARE_STRING (strerrorname_np (ESTRPIPE), "ESTRPIPE"); ++#endif ++#ifdef EUCLEAN ++ TEST_COMPARE_STRING (strerrordesc_np (EUCLEAN), ++ "Structure needs cleaning"); ++ TEST_COMPARE_STRING (strerrorname_np (EUCLEAN), "EUCLEAN"); ++#endif ++#ifdef ENOTNAM ++ TEST_COMPARE_STRING (strerrordesc_np (ENOTNAM), ++ "Not a XENIX named type file"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOTNAM), "ENOTNAM"); ++#endif ++#ifdef ENAVAIL ++ TEST_COMPARE_STRING (strerrordesc_np (ENAVAIL), ++ "No XENIX semaphores available"); ++ TEST_COMPARE_STRING (strerrorname_np (ENAVAIL), "ENAVAIL"); ++#endif ++#ifdef EISNAM ++ TEST_COMPARE_STRING (strerrordesc_np (EISNAM), "Is a named type file"); ++ TEST_COMPARE_STRING (strerrorname_np (EISNAM), "EISNAM"); ++#endif ++#ifdef EREMOTEIO ++ TEST_COMPARE_STRING (strerrordesc_np (EREMOTEIO), "Remote I/O error"); ++ TEST_COMPARE_STRING (strerrorname_np (EREMOTEIO), "EREMOTEIO"); ++#endif ++#ifdef ENOMEDIUM ++ TEST_COMPARE_STRING (strerrordesc_np (ENOMEDIUM), "No medium found"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOMEDIUM), "ENOMEDIUM"); ++#endif ++#ifdef EMEDIUMTYPE ++ TEST_COMPARE_STRING (strerrordesc_np (EMEDIUMTYPE), "Wrong medium type"); ++ TEST_COMPARE_STRING (strerrorname_np (EMEDIUMTYPE), "EMEDIUMTYPE"); ++#endif ++#ifdef ENOKEY ++ TEST_COMPARE_STRING (strerrordesc_np (ENOKEY), ++ "Required key not available"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOKEY), "ENOKEY"); ++#endif ++#ifdef EKEYEXPIRED ++ TEST_COMPARE_STRING (strerrordesc_np (EKEYEXPIRED), "Key has expired"); ++ TEST_COMPARE_STRING (strerrorname_np (EKEYEXPIRED), "EKEYEXPIRED"); ++#endif ++#ifdef EKEYREVOKED ++ TEST_COMPARE_STRING (strerrordesc_np (EKEYREVOKED), ++ "Key has been revoked"); ++ TEST_COMPARE_STRING (strerrorname_np (EKEYREVOKED), "EKEYREVOKED"); ++#endif ++#ifdef EKEYREJECTED ++ TEST_COMPARE_STRING (strerrordesc_np (EKEYREJECTED), ++ "Key was rejected by service"); ++ TEST_COMPARE_STRING (strerrorname_np (EKEYREJECTED), "EKEYREJECTED"); ++#endif ++#ifdef ERFKILL ++ TEST_COMPARE_STRING (strerrordesc_np (ERFKILL), ++ "Operation not possible due to RF-kill"); ++ TEST_COMPARE_STRING (strerrorname_np (ERFKILL), "ERFKILL"); ++#endif ++#ifdef EHWPOISON ++ TEST_COMPARE_STRING (strerrordesc_np (EHWPOISON), ++ "Memory page has hardware error"); ++ TEST_COMPARE_STRING (strerrorname_np (EHWPOISON), "EHWPOISON"); ++#endif ++#ifdef EBADRPC ++ TEST_COMPARE_STRING (strerrordesc_np (EBADRPC), "RPC struct is bad"); ++ TEST_COMPARE_STRING (strerrorname_np (EBADRPC), "EBADRPC"); ++#endif ++#ifdef EFTYPE ++ TEST_COMPARE_STRING (strerrordesc_np (EFTYPE), ++ "Inappropriate file type or format"); ++ TEST_COMPARE_STRING (strerrorname_np (EFTYPE), "EFTYPE"); ++#endif ++#ifdef EPROCUNAVAIL ++ TEST_COMPARE_STRING (strerrordesc_np (EPROCUNAVAIL), ++ "RPC bad procedure for program"); ++ TEST_COMPARE_STRING (strerrorname_np (EPROCUNAVAIL), "EPROCUNAVAIL"); ++#endif ++#ifdef EAUTH ++ TEST_COMPARE_STRING (strerrordesc_np (EAUTH), "Authentication error"); ++ TEST_COMPARE_STRING (strerrorname_np (EAUTH), "EAUTH"); ++#endif ++#ifdef EDIED ++ TEST_COMPARE_STRING (strerrordesc_np (EDIED), "Translator died"); ++ TEST_COMPARE_STRING (strerrorname_np (EDIED), "EDIED"); ++#endif ++#ifdef ERPCMISMATCH ++ TEST_COMPARE_STRING (strerrordesc_np (ERPCMISMATCH), "RPC version wrong"); ++ TEST_COMPARE_STRING (strerrorname_np (ERPCMISMATCH), "ERPCMISMATCH"); ++#endif ++#ifdef EGREGIOUS ++ TEST_COMPARE_STRING (strerrordesc_np (EGREGIOUS), ++ "You really blew it this time"); ++ TEST_COMPARE_STRING (strerrorname_np (EGREGIOUS), "EGREGIOUS"); ++#endif ++#ifdef EPROCLIM ++ TEST_COMPARE_STRING (strerrordesc_np (EPROCLIM), "Too many processes"); ++ TEST_COMPARE_STRING (strerrorname_np (EPROCLIM), "EPROCLIM"); ++#endif ++#ifdef EGRATUITOUS ++ TEST_COMPARE_STRING (strerrordesc_np (EGRATUITOUS), "Gratuitous error"); ++ TEST_COMPARE_STRING (strerrorname_np (EGRATUITOUS), "EGRATUITOUS"); ++#endif ++#if defined (ENOTSUP) && ENOTSUP != EOPNOTSUPP ++ TEST_COMPARE_STRING (strerrordesc_np (ENOTSUP), "Not supported"); ++ TEST_COMPARE_STRING (strerrorname_np (ENOTSUP), "ENOTSUP"); ++#endif ++#ifdef EPROGMISMATCH ++ TEST_COMPARE_STRING (strerrordesc_np (EPROGMISMATCH), ++ "RPC program version wrong"); ++ TEST_COMPARE_STRING (strerrorname_np (EPROGMISMATCH), "EPROGMISMATCH"); ++#endif ++#ifdef EBACKGROUND ++ TEST_COMPARE_STRING (strerrordesc_np (EBACKGROUND), ++ "Inappropriate operation for background process"); ++ TEST_COMPARE_STRING (strerrorname_np (EBACKGROUND), "EBACKGROUND"); ++#endif ++#ifdef EIEIO ++ TEST_COMPARE_STRING (strerrordesc_np (EIEIO), "Computer bought the farm"); ++ TEST_COMPARE_STRING (strerrorname_np (EIEIO), "EIEIO"); ++#endif ++#if defined (EWOULDBLOCK) && EWOULDBLOCK != EAGAIN ++ TEST_COMPARE_STRING (strerrordesc_np (EWOULDBLOCK), ++ "Operation would block"); ++ TEST_COMPARE_STRING (strerrorname_np (EWOULDBLOCK), "EWOULDBLOCK"); ++#endif ++#ifdef ENEEDAUTH ++ TEST_COMPARE_STRING (strerrordesc_np (ENEEDAUTH), "Need authenticator"); ++ TEST_COMPARE_STRING (strerrorname_np (ENEEDAUTH), "ENEEDAUTH"); ++#endif ++#ifdef ED ++ TEST_COMPARE_STRING (strerrordesc_np (ED), "?"); ++ TEST_COMPARE_STRING (strerrorname_np (ED), "ED"); ++#endif ++#ifdef EPROGUNAVAIL ++ TEST_COMPARE_STRING (strerrordesc_np (EPROGUNAVAIL), ++ "RPC program not available"); ++ TEST_COMPARE_STRING (strerrorname_np (EPROGUNAVAIL), "EPROGUNAVAIL"); ++#endif + + return 0; + } +diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c +index 95b46dcbeb..3a323547f9 100644 +--- a/stdio-common/vfscanf-internal.c ++++ b/stdio-common/vfscanf-internal.c +@@ -277,7 +277,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, + #endif + { + va_list arg; +- const CHAR_T *f = format; ++ const UCHAR_T *f = (const UCHAR_T *) format; + UCHAR_T fc; /* Current character of the format. */ + WINT_T done = 0; /* Assignments done. */ + size_t read_in = 0; /* Chars read in. */ +@@ -415,10 +415,11 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, + #endif + + #ifndef COMPILE_WSCANF +- if (!isascii ((unsigned char) *f)) ++ if (!isascii (*f)) + { + /* Non-ASCII, may be a multibyte. */ +- int len = __mbrlen (f, strlen (f), &state); ++ int len = __mbrlen ((const char *) f, strlen ((const char *) f), ++ &state); + if (len > 0) + { + do +@@ -426,7 +427,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, + c = inchar (); + if (__glibc_unlikely (c == EOF)) + input_error (); +- else if (c != (unsigned char) *f++) ++ else if (c != *f++) + { + ungetc_not_eof (c, s); + conv_error (); +@@ -484,9 +485,9 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, + char_buffer_rewind (&charbuf); + + /* Check for a positional parameter specification. */ +- if (ISDIGIT ((UCHAR_T) *f)) ++ if (ISDIGIT (*f)) + { +- argpos = read_int ((const UCHAR_T **) &f); ++ argpos = read_int (&f); + if (*f == L_('$')) + ++f; + else +@@ -521,8 +522,8 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, + + /* Find the maximum field width. */ + width = 0; +- if (ISDIGIT ((UCHAR_T) *f)) +- width = read_int ((const UCHAR_T **) &f); ++ if (ISDIGIT (*f)) ++ width = read_int (&f); + got_width: + if (width == 0) + width = -1; +@@ -2522,12 +2523,11 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, + } + + while ((fc = *f++) != '\0' && fc != ']') +- if (fc == '-' && *f != '\0' && *f != ']' +- && (unsigned char) f[-2] <= (unsigned char) *f) ++ if (fc == '-' && *f != '\0' && *f != ']' && f[-2] <= *f) + { + /* Add all characters from the one before the '-' + up to (but not including) the next format char. */ +- for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc) ++ for (fc = f[-2]; fc < *f; ++fc) + ((char *)charbuf.scratch.data)[fc] = 1; + } + else +diff --git a/stdlib/tst-secure-getenv.c b/stdlib/tst-secure-getenv.c +index 3cfe9a05c3..d4b1139c5e 100644 +--- a/stdlib/tst-secure-getenv.c ++++ b/stdlib/tst-secure-getenv.c +@@ -30,167 +30,12 @@ + #include <sys/wait.h> + #include <unistd.h> + ++#include <support/check.h> + #include <support/support.h> ++#include <support/capture_subprocess.h> + #include <support/test-driver.h> + + static char MAGIC_ARGUMENT[] = "run-actual-test"; +-#define MAGIC_STATUS 19 +- +-/* Return a GID which is not our current GID, but is present in the +- supplementary group list. */ +-static gid_t +-choose_gid (void) +-{ +- int count = getgroups (0, NULL); +- if (count < 0) +- { +- printf ("getgroups: %m\n"); +- exit (1); +- } +- gid_t *groups; +- groups = xcalloc (count, sizeof (*groups)); +- int ret = getgroups (count, groups); +- if (ret < 0) +- { +- printf ("getgroups: %m\n"); +- exit (1); +- } +- gid_t current = getgid (); +- gid_t not_current = 0; +- for (int i = 0; i < ret; ++i) +- { +- if (groups[i] != current) +- { +- not_current = groups[i]; +- break; +- } +- } +- free (groups); +- return not_current; +-} +- +- +-/* Copies the executable into a restricted directory, so that we can +- safely make it SGID with the TARGET group ID. Then runs the +- executable. */ +-static int +-run_executable_sgid (gid_t target) +-{ +- char *dirname = xasprintf ("%s/secure-getenv.%jd", +- test_dir, (intmax_t) getpid ()); +- char *execname = xasprintf ("%s/bin", dirname); +- int infd = -1; +- int outfd = -1; +- int ret = -1; +- if (mkdir (dirname, 0700) < 0) +- { +- printf ("mkdir: %m\n"); +- goto err; +- } +- infd = open ("/proc/self/exe", O_RDONLY); +- if (infd < 0) +- { +- printf ("open (/proc/self/exe): %m\n"); +- goto err; +- } +- outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); +- if (outfd < 0) +- { +- printf ("open (%s): %m\n", execname); +- goto err; +- } +- char buf[4096]; +- for (;;) +- { +- ssize_t rdcount = read (infd, buf, sizeof (buf)); +- if (rdcount < 0) +- { +- printf ("read: %m\n"); +- goto err; +- } +- if (rdcount == 0) +- break; +- char *p = buf; +- char *end = buf + rdcount; +- while (p != end) +- { +- ssize_t wrcount = write (outfd, buf, end - p); +- if (wrcount == 0) +- errno = ENOSPC; +- if (wrcount <= 0) +- { +- printf ("write: %m\n"); +- goto err; +- } +- p += wrcount; +- } +- } +- if (fchown (outfd, getuid (), target) < 0) +- { +- printf ("fchown (%s): %m\n", execname); +- goto err; +- } +- if (fchmod (outfd, 02750) < 0) +- { +- printf ("fchmod (%s): %m\n", execname); +- goto err; +- } +- if (close (outfd) < 0) +- { +- printf ("close (outfd): %m\n"); +- goto err; +- } +- if (close (infd) < 0) +- { +- printf ("close (infd): %m\n"); +- goto err; +- } +- +- int kid = fork (); +- if (kid < 0) +- { +- printf ("fork: %m\n"); +- goto err; +- } +- if (kid == 0) +- { +- /* Child process. */ +- char *args[] = { execname, MAGIC_ARGUMENT, NULL }; +- execve (execname, args, environ); +- printf ("execve (%s): %m\n", execname); +- _exit (1); +- } +- int status; +- if (waitpid (kid, &status, 0) < 0) +- { +- printf ("waitpid: %m\n"); +- goto err; +- } +- if (!WIFEXITED (status) || WEXITSTATUS (status) != MAGIC_STATUS) +- { +- printf ("Unexpected exit status %d from child process\n", +- status); +- goto err; +- } +- ret = 0; +- +-err: +- if (outfd >= 0) +- close (outfd); +- if (infd >= 0) +- close (infd); +- if (execname) +- { +- unlink (execname); +- free (execname); +- } +- if (dirname) +- { +- rmdir (dirname); +- free (dirname); +- } +- return ret; +-} + + static int + do_test (void) +@@ -212,15 +57,15 @@ do_test (void) + exit (1); + } + +- gid_t target = choose_gid (); +- if (target == 0) +- { +- fprintf (stderr, +- "Could not find a suitable GID for user %jd, skipping test\n", +- (intmax_t) getuid ()); +- exit (0); +- } +- return run_executable_sgid (target); ++ int status = support_capture_subprogram_self_sgid (MAGIC_ARGUMENT); ++ ++ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) ++ return EXIT_UNSUPPORTED; ++ ++ if (!WIFEXITED (status)) ++ FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); ++ ++ return 0; + } + + static void +@@ -229,23 +74,15 @@ alternative_main (int argc, char **argv) + if (argc == 2 && strcmp (argv[1], MAGIC_ARGUMENT) == 0) + { + if (getgid () == getegid ()) +- { +- /* This can happen if the file system is mounted nosuid. */ +- fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n", +- (intmax_t) getgid ()); +- exit (MAGIC_STATUS); +- } ++ /* This can happen if the file system is mounted nosuid. */ ++ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", ++ (intmax_t) getgid ()); + if (getenv ("PATH") == NULL) +- { +- printf ("PATH variable not present\n"); +- exit (3); +- } ++ FAIL_EXIT (3, "PATH variable not present\n"); + if (secure_getenv ("PATH") != NULL) +- { +- printf ("PATH variable not filtered out\n"); +- exit (4); +- } +- exit (MAGIC_STATUS); ++ FAIL_EXIT (4, "PATH variable not filtered out\n"); ++ ++ exit (EXIT_SUCCESS); + } + } + +diff --git a/string/bits/string_fortified.h b/string/bits/string_fortified.h +index 309d0f39b2..c8d3051af8 100644 +--- a/string/bits/string_fortified.h ++++ b/string/bits/string_fortified.h +@@ -22,11 +22,6 @@ + # error "Never use <bits/string_fortified.h> directly; include <string.h> instead." + #endif + +-#if !__GNUC_PREREQ (5,0) +-__warndecl (__warn_memset_zero_len, +- "memset used with constant zero length parameter; this could be due to transposed parameters"); +-#endif +- + __fortify_function void * + __NTH (memcpy (void *__restrict __dest, const void *__restrict __src, + size_t __len)) +@@ -58,16 +53,6 @@ __NTH (mempcpy (void *__restrict __dest, const void *__restrict __src, + __fortify_function void * + __NTH (memset (void *__dest, int __ch, size_t __len)) + { +- /* GCC-5.0 and newer implements these checks in the compiler, so we don't +- need them here. */ +-#if !__GNUC_PREREQ (5,0) +- if (__builtin_constant_p (__len) && __len == 0 +- && (!__builtin_constant_p (__ch) || __ch != 0)) +- { +- __warn_memset_zero_len (); +- return __dest; +- } +-#endif + return __builtin___memset_chk (__dest, __ch, __len, __bos0 (__dest)); + } + +diff --git a/string/test-memchr.c b/string/test-memchr.c +index 5dd0aa5470..de70e794d9 100644 +--- a/string/test-memchr.c ++++ b/string/test-memchr.c +@@ -65,8 +65,8 @@ do_one_test (impl_t *impl, const CHAR *s, int c, size_t n, CHAR *exp_res) + CHAR *res = CALL (impl, s, c, n); + if (res != exp_res) + { +- error (0, 0, "Wrong result in function %s %p %p", impl->name, +- res, exp_res); ++ error (0, 0, "Wrong result in function %s (%p, %d, %zu) -> %p != %p", ++ impl->name, s, c, n, res, exp_res); + ret = 1; + return; + } +@@ -91,7 +91,7 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char) + } + buf[align + len] = 0; + +- if (pos < len) ++ if (pos < MIN(n, len)) + { + buf[align + pos] = seek_char; + buf[align + len] = -seek_char; +@@ -107,6 +107,38 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char) + do_one_test (impl, (CHAR *) (buf + align), seek_char, n, result); + } + ++static void ++do_overflow_tests (void) ++{ ++ size_t i, j, len; ++ const size_t one = 1; ++ uintptr_t buf_addr = (uintptr_t) buf1; ++ ++ for (i = 0; i < 750; ++i) ++ { ++ do_test (0, i, 751, SIZE_MAX - i, BIG_CHAR); ++ do_test (0, i, 751, i - buf_addr, BIG_CHAR); ++ do_test (0, i, 751, -buf_addr - i, BIG_CHAR); ++ do_test (0, i, 751, SIZE_MAX - buf_addr - i, BIG_CHAR); ++ do_test (0, i, 751, SIZE_MAX - buf_addr + i, BIG_CHAR); ++ ++ len = 0; ++ for (j = 8 * sizeof(size_t) - 1; j ; --j) ++ { ++ len |= one << j; ++ do_test (0, i, 751, len - i, BIG_CHAR); ++ do_test (0, i, 751, len + i, BIG_CHAR); ++ do_test (0, i, 751, len - buf_addr - i, BIG_CHAR); ++ do_test (0, i, 751, len - buf_addr + i, BIG_CHAR); ++ ++ do_test (0, i, 751, ~len - i, BIG_CHAR); ++ do_test (0, i, 751, ~len + i, BIG_CHAR); ++ do_test (0, i, 751, ~len - buf_addr - i, BIG_CHAR); ++ do_test (0, i, 751, ~len - buf_addr + i, BIG_CHAR); ++ } ++ } ++} ++ + static void + do_random_tests (void) + { +@@ -221,6 +253,7 @@ test_main (void) + do_test (page_size / 2 - i, i, i, 1, 0x9B); + + do_random_tests (); ++ do_overflow_tests (); + return ret; + } + +diff --git a/string/test-strncat.c b/string/test-strncat.c +index abbacb95c6..0c7f68d086 100644 +--- a/string/test-strncat.c ++++ b/string/test-strncat.c +@@ -134,6 +134,66 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2, + } + } + ++static void ++do_overflow_tests (void) ++{ ++ size_t i, j, len; ++ const size_t one = 1; ++ CHAR *s1, *s2; ++ uintptr_t s1_addr; ++ s1 = (CHAR *) buf1; ++ s2 = (CHAR *) buf2; ++ s1_addr = (uintptr_t)s1; ++ for (j = 0; j < 200; ++j) ++ s2[j] = 32 + 23 * j % (BIG_CHAR - 32); ++ s2[200] = 0; ++ for (i = 0; i < 750; ++i) { ++ for (j = 0; j < i; ++j) ++ s1[j] = 32 + 23 * j % (BIG_CHAR - 32); ++ s1[i] = '\0'; ++ ++ FOR_EACH_IMPL (impl, 0) ++ { ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, SIZE_MAX - i); ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, i - s1_addr); ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, -s1_addr - i); ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, SIZE_MAX - s1_addr - i); ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, SIZE_MAX - s1_addr + i); ++ } ++ ++ len = 0; ++ for (j = 8 * sizeof(size_t) - 1; j ; --j) ++ { ++ len |= one << j; ++ FOR_EACH_IMPL (impl, 0) ++ { ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, len - i); ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, len + i); ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, len - s1_addr - i); ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, len - s1_addr + i); ++ ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, ~len - i); ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, ~len + i); ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, ~len - s1_addr - i); ++ s2[200] = '\0'; ++ do_one_test (impl, s2, s1, ~len - s1_addr + i); ++ } ++ } ++ } ++} ++ + static void + do_random_tests (void) + { +@@ -316,6 +376,7 @@ test_main (void) + } + + do_random_tests (); ++ do_overflow_tests (); + return ret; + } + +diff --git a/string/test-strnlen.c b/string/test-strnlen.c +index 80ac9e8602..a1a6746cc9 100644 +--- a/string/test-strnlen.c ++++ b/string/test-strnlen.c +@@ -27,6 +27,7 @@ + + #ifndef WIDE + # define STRNLEN strnlen ++# define MEMSET memset + # define CHAR char + # define BIG_CHAR CHAR_MAX + # define MIDDLE_CHAR 127 +@@ -34,6 +35,7 @@ + #else + # include <wchar.h> + # define STRNLEN wcsnlen ++# define MEMSET wmemset + # define CHAR wchar_t + # define BIG_CHAR WCHAR_MAX + # define MIDDLE_CHAR 1121 +@@ -87,6 +89,38 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) + do_one_test (impl, (CHAR *) (buf + align), maxlen, MIN (len, maxlen)); + } + ++static void ++do_overflow_tests (void) ++{ ++ size_t i, j, len; ++ const size_t one = 1; ++ uintptr_t buf_addr = (uintptr_t) buf1; ++ ++ for (i = 0; i < 750; ++i) ++ { ++ do_test (0, i, SIZE_MAX - i, BIG_CHAR); ++ do_test (0, i, i - buf_addr, BIG_CHAR); ++ do_test (0, i, -buf_addr - i, BIG_CHAR); ++ do_test (0, i, SIZE_MAX - buf_addr - i, BIG_CHAR); ++ do_test (0, i, SIZE_MAX - buf_addr + i, BIG_CHAR); ++ ++ len = 0; ++ for (j = 8 * sizeof(size_t) - 1; j ; --j) ++ { ++ len |= one << j; ++ do_test (0, i, len - i, BIG_CHAR); ++ do_test (0, i, len + i, BIG_CHAR); ++ do_test (0, i, len - buf_addr - i, BIG_CHAR); ++ do_test (0, i, len - buf_addr + i, BIG_CHAR); ++ ++ do_test (0, i, ~len - i, BIG_CHAR); ++ do_test (0, i, ~len + i, BIG_CHAR); ++ do_test (0, i, ~len - buf_addr - i, BIG_CHAR); ++ do_test (0, i, ~len - buf_addr + i, BIG_CHAR); ++ } ++ } ++} ++ + static void + do_random_tests (void) + { +@@ -153,7 +187,7 @@ do_page_tests (void) + size_t last_offset = (page_size / sizeof (CHAR)) - 1; + + CHAR *s = (CHAR *) buf2; +- memset (s, 65, (last_offset - 1)); ++ MEMSET (s, 65, (last_offset - 1)); + s[last_offset] = 0; + + /* Place short strings ending at page boundary. */ +@@ -196,6 +230,35 @@ do_page_tests (void) + } + } + ++/* Tests meant to unveil fail on implementations that access bytes ++ beyond the maxium length. */ ++ ++static void ++do_page_2_tests (void) ++{ ++ size_t i, exp_len, offset; ++ size_t last_offset = page_size / sizeof (CHAR); ++ ++ CHAR *s = (CHAR *) buf2; ++ MEMSET (s, 65, last_offset); ++ ++ /* Place short strings ending at page boundary without the null ++ byte. */ ++ offset = last_offset; ++ for (i = 0; i < 128; i++) ++ { ++ /* Decrease offset to stress several sizes and alignments. */ ++ offset--; ++ exp_len = last_offset - offset; ++ FOR_EACH_IMPL (impl, 0) ++ { ++ /* If an implementation goes beyond EXP_LEN, it will trigger ++ the segfault. */ ++ do_one_test (impl, (CHAR *) (s + offset), exp_len, exp_len); ++ } ++ } ++} ++ + int + test_main (void) + { +@@ -242,6 +305,8 @@ test_main (void) + + do_random_tests (); + do_page_tests (); ++ do_page_2_tests (); ++ do_overflow_tests (); + return ret; + } + +diff --git a/support/Makefile b/support/Makefile +index 93faafddf9..3d3aff5ff9 100644 +--- a/support/Makefile ++++ b/support/Makefile +@@ -35,6 +35,8 @@ libsupport-routines = \ + ignore_stderr \ + next_to_fault \ + oom_error \ ++ resolv_response_context_duplicate \ ++ resolv_response_context_free \ + resolv_test \ + set_fortify_handler \ + support-xfstat \ +@@ -133,6 +135,7 @@ libsupport-routines = \ + xpthread_join \ + xpthread_key_create \ + xpthread_key_delete \ ++ xpthread_kill \ + xpthread_mutex_consistent \ + xpthread_mutex_destroy \ + xpthread_mutex_init \ +diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h +index 9808750f80..421f657678 100644 +--- a/support/capture_subprocess.h ++++ b/support/capture_subprocess.h +@@ -41,6 +41,12 @@ struct support_capture_subprocess support_capture_subprocess + struct support_capture_subprocess support_capture_subprogram + (const char *file, char *const argv[]); + ++/* Copy the running program into a setgid binary and run it with CHILD_ID ++ argument. If execution is successful, return the exit status of the child ++ program, otherwise return a non-zero failure exit code. */ ++int support_capture_subprogram_self_sgid ++ (char *child_id); ++ + /* Deallocate the subprocess data captured by + support_capture_subprocess. */ + void support_capture_subprocess_free (struct support_capture_subprocess *); +diff --git a/support/resolv_response_context_duplicate.c b/support/resolv_response_context_duplicate.c +new file mode 100644 +index 0000000000..f9c5c3462a +--- /dev/null ++++ b/support/resolv_response_context_duplicate.c +@@ -0,0 +1,37 @@ ++/* Duplicate a response context used in DNS resolver tests. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <string.h> ++#include <support/resolv_test.h> ++#include <support/support.h> ++ ++struct resolv_response_context * ++resolv_response_context_duplicate (const struct resolv_response_context *ctx) ++{ ++ struct resolv_response_context *result = xmalloc (sizeof (*result)); ++ memcpy (result, ctx, sizeof (*result)); ++ if (result->client_address != NULL) ++ { ++ result->client_address = xmalloc (result->client_address_length); ++ memcpy (result->client_address, ctx->client_address, ++ result->client_address_length); ++ } ++ result->query_buffer = xmalloc (result->query_length); ++ memcpy (result->query_buffer, ctx->query_buffer, result->query_length); ++ return result; ++} +diff --git a/support/resolv_response_context_free.c b/support/resolv_response_context_free.c +new file mode 100644 +index 0000000000..b88c05ffd4 +--- /dev/null ++++ b/support/resolv_response_context_free.c +@@ -0,0 +1,28 @@ ++/* Free a response context used in DNS resolver tests. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <stdlib.h> ++#include <support/resolv_test.h> ++ ++void ++resolv_response_context_free (struct resolv_response_context *ctx) ++{ ++ free (ctx->query_buffer); ++ free (ctx->client_address); ++ free (ctx); ++} +diff --git a/support/resolv_test.c b/support/resolv_test.c +index 53b7fc41ab..9878a040a3 100644 +--- a/support/resolv_test.c ++++ b/support/resolv_test.c +@@ -181,7 +181,9 @@ resolv_response_init (struct resolv_response_builder *b, + b->buffer[2] |= b->query_buffer[2] & 0x01; /* Copy the RD bit. */ + if (flags.tc) + b->buffer[2] |= 0x02; +- b->buffer[3] = 0x80 | flags.rcode; /* Always set RA. */ ++ b->buffer[3] = flags.rcode; ++ if (!flags.clear_ra) ++ b->buffer[3] |= 0x80; + if (flags.ad) + b->buffer[3] |= 0x20; + +@@ -434,9 +436,9 @@ resolv_response_buffer (const struct resolv_response_builder *b) + return result; + } + +-static struct resolv_response_builder * +-response_builder_allocate +- (const unsigned char *query_buffer, size_t query_length) ++struct resolv_response_builder * ++resolv_response_builder_allocate (const unsigned char *query_buffer, ++ size_t query_length) + { + struct resolv_response_builder *b = xmalloc (sizeof (*b)); + memset (b, 0, offsetof (struct resolv_response_builder, buffer)); +@@ -445,8 +447,8 @@ response_builder_allocate + return b; + } + +-static void +-response_builder_free (struct resolv_response_builder *b) ++void ++resolv_response_builder_free (struct resolv_response_builder *b) + { + tdestroy (b->compression_offsets, free); + free (b); +@@ -661,13 +663,17 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index) + + struct resolv_response_context ctx = + { ++ .test = obj, ++ .client_address = &peer, ++ .client_address_length = peerlen, + .query_buffer = query, + .query_length = length, + .server_index = server_index, + .tcp = false, + .edns = qinfo.edns, + }; +- struct resolv_response_builder *b = response_builder_allocate (query, length); ++ struct resolv_response_builder *b ++ = resolv_response_builder_allocate (query, length); + obj->config.response_callback + (&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype); + +@@ -684,7 +690,7 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index) + if (b->offset >= 12) + printf ("info: UDP server %d: sending response:" + " %zu bytes, RCODE %d (for %s/%u/%u)\n", +- server_index, b->offset, b->buffer[3] & 0x0f, ++ ctx.server_index, b->offset, b->buffer[3] & 0x0f, + qinfo.qname, qinfo.qclass, qinfo.qtype); + else + printf ("info: UDP server %d: sending response: %zu bytes" +@@ -694,23 +700,31 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index) + if (b->truncate_bytes > 0) + printf ("info: truncated by %u bytes\n", b->truncate_bytes); + } +- size_t to_send = b->offset; +- if (to_send < b->truncate_bytes) +- to_send = 0; +- else +- to_send -= b->truncate_bytes; +- +- /* Ignore most errors here because the other end may have closed +- the socket. */ +- if (sendto (obj->servers[server_index].socket_udp, +- b->buffer, to_send, 0, +- (struct sockaddr *) &peer, peerlen) < 0) +- TEST_VERIFY_EXIT (errno != EBADF); ++ resolv_response_send_udp (&ctx, b); + } +- response_builder_free (b); ++ resolv_response_builder_free (b); + return true; + } + ++void ++resolv_response_send_udp (const struct resolv_response_context *ctx, ++ struct resolv_response_builder *b) ++{ ++ TEST_VERIFY_EXIT (!ctx->tcp); ++ size_t to_send = b->offset; ++ if (to_send < b->truncate_bytes) ++ to_send = 0; ++ else ++ to_send -= b->truncate_bytes; ++ ++ /* Ignore most errors here because the other end may have closed ++ the socket. */ ++ if (sendto (ctx->test->servers[ctx->server_index].socket_udp, ++ b->buffer, to_send, 0, ++ ctx->client_address, ctx->client_address_length) < 0) ++ TEST_VERIFY_EXIT (errno != EBADF); ++} ++ + /* UDP thread_callback function. Variant for one thread per + server. */ + static void +@@ -897,14 +911,15 @@ server_thread_tcp_client (void *arg) + + struct resolv_response_context ctx = + { ++ .test = closure->obj, + .query_buffer = query_buffer, + .query_length = query_length, + .server_index = closure->server_index, + .tcp = true, + .edns = qinfo.edns, + }; +- struct resolv_response_builder *b = response_builder_allocate +- (query_buffer, query_length); ++ struct resolv_response_builder *b ++ = resolv_response_builder_allocate (query_buffer, query_length); + closure->obj->config.response_callback + (&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype); + +@@ -936,7 +951,7 @@ server_thread_tcp_client (void *arg) + writev_fully (closure->client_socket, buffers, 2); + } + bool close_flag = b->close; +- response_builder_free (b); ++ resolv_response_builder_free (b); + free (query_buffer); + if (close_flag) + break; +diff --git a/support/resolv_test.h b/support/resolv_test.h +index 67819469a0..31a5c1c3e7 100644 +--- a/support/resolv_test.h ++++ b/support/resolv_test.h +@@ -35,25 +35,36 @@ struct resolv_edns_info + uint16_t payload_size; + }; + ++/* This opaque struct collects information about the resolver testing ++ currently in progress. */ ++struct resolv_test; ++ + /* This struct provides context information when the response callback + specified in struct resolv_redirect_config is invoked. */ + struct resolv_response_context + { +- const unsigned char *query_buffer; ++ struct resolv_test *test; ++ void *client_address; ++ size_t client_address_length; ++ unsigned char *query_buffer; + size_t query_length; + int server_index; + bool tcp; + struct resolv_edns_info edns; + }; + ++/* Produces a deep copy of the context. */ ++struct resolv_response_context * ++ resolv_response_context_duplicate (const struct resolv_response_context *); ++ ++/* Frees the copy. For the context passed to the response function, ++ this happens implicitly. */ ++void resolv_response_context_free (struct resolv_response_context *); ++ + /* This opaque struct is used to construct responses from within the + response callback function. */ + struct resolv_response_builder; + +-/* This opaque struct collects information about the resolver testing +- currently in progress. */ +-struct resolv_test; +- + enum + { + /* Maximum number of test servers supported by the framework. */ +@@ -137,6 +148,10 @@ struct resolv_response_flags + /* If true, the AD (authenticated data) flag will be set. */ + bool ad; + ++ /* If true, do not set the RA (recursion available) flag in the ++ response. */ ++ bool clear_ra; ++ + /* Initial section count values. Can be used to artificially + increase the counts, for malformed packet testing.*/ + unsigned short qdcount; +@@ -188,6 +203,22 @@ void resolv_response_close (struct resolv_response_builder *); + /* The size of the response packet built so far. */ + size_t resolv_response_length (const struct resolv_response_builder *); + ++/* Allocates a response builder tied to a specific query packet, ++ starting at QUERY_BUFFER, containing QUERY_LENGTH bytes. */ ++struct resolv_response_builder * ++ resolv_response_builder_allocate (const unsigned char *query_buffer, ++ size_t query_length); ++ ++/* Deallocates a response buffer. */ ++void resolv_response_builder_free (struct resolv_response_builder *); ++ ++/* Sends a UDP response using a specific context. This can be used to ++ reorder or duplicate responses, along with ++ resolv_response_context_duplicate and ++ response_builder_allocate. */ ++void resolv_response_send_udp (const struct resolv_response_context *, ++ struct resolv_response_builder *); ++ + __END_DECLS + + #endif /* SUPPORT_RESOLV_TEST_H */ +diff --git a/support/subprocess.h b/support/subprocess.h +index 8b442fd5c0..34ffd02e8e 100644 +--- a/support/subprocess.h ++++ b/support/subprocess.h +@@ -38,6 +38,11 @@ struct support_subprocess support_subprocess + struct support_subprocess support_subprogram + (const char *file, char *const argv[]); + ++/* Invoke program FILE with ARGV arguments by using posix_spawn and wait for it ++ to complete. Return program exit status. */ ++int support_subprogram_wait ++ (const char *file, char *const argv[]); ++ + /* Wait for the subprocess indicated by PROC::PID. Return the status + indicate by waitpid call. */ + int support_process_wait (struct support_subprocess *proc); +diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c +index eeed676e3d..28a37df67f 100644 +--- a/support/support_capture_subprocess.c ++++ b/support/support_capture_subprocess.c +@@ -20,11 +20,14 @@ + #include <support/capture_subprocess.h> + + #include <errno.h> ++#include <fcntl.h> + #include <stdlib.h> + #include <support/check.h> + #include <support/xunistd.h> + #include <support/xsocket.h> + #include <support/xspawn.h> ++#include <support/support.h> ++#include <support/test-driver.h> + + static void + transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream) +@@ -36,7 +39,7 @@ transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream) + if (ret < 0) + { + support_record_failure (); +- printf ("error: reading from subprocess %s: %m", what); ++ printf ("error: reading from subprocess %s: %m\n", what); + pfd->events = 0; + pfd->revents = 0; + } +@@ -102,6 +105,129 @@ support_capture_subprogram (const char *file, char *const argv[]) + return result; + } + ++/* Copies the executable into a restricted directory, so that we can ++ safely make it SGID with the TARGET group ID. Then runs the ++ executable. */ ++static int ++copy_and_spawn_sgid (char *child_id, gid_t gid) ++{ ++ char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd", ++ test_dir, (intmax_t) getpid ()); ++ char *execname = xasprintf ("%s/bin", dirname); ++ int infd = -1; ++ int outfd = -1; ++ int ret = 1, status = 1; ++ ++ TEST_VERIFY (mkdir (dirname, 0700) == 0); ++ if (support_record_failure_is_failed ()) ++ goto err; ++ ++ infd = open ("/proc/self/exe", O_RDONLY); ++ if (infd < 0) ++ FAIL_UNSUPPORTED ("unsupported: Cannot read binary from procfs\n"); ++ ++ outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); ++ TEST_VERIFY (outfd >= 0); ++ if (support_record_failure_is_failed ()) ++ goto err; ++ ++ char buf[4096]; ++ for (;;) ++ { ++ ssize_t rdcount = read (infd, buf, sizeof (buf)); ++ TEST_VERIFY (rdcount >= 0); ++ if (support_record_failure_is_failed ()) ++ goto err; ++ if (rdcount == 0) ++ break; ++ char *p = buf; ++ char *end = buf + rdcount; ++ while (p != end) ++ { ++ ssize_t wrcount = write (outfd, buf, end - p); ++ if (wrcount == 0) ++ errno = ENOSPC; ++ TEST_VERIFY (wrcount > 0); ++ if (support_record_failure_is_failed ()) ++ goto err; ++ p += wrcount; ++ } ++ } ++ TEST_VERIFY (fchown (outfd, getuid (), gid) == 0); ++ if (support_record_failure_is_failed ()) ++ goto err; ++ TEST_VERIFY (fchmod (outfd, 02750) == 0); ++ if (support_record_failure_is_failed ()) ++ goto err; ++ TEST_VERIFY (close (outfd) == 0); ++ if (support_record_failure_is_failed ()) ++ goto err; ++ TEST_VERIFY (close (infd) == 0); ++ if (support_record_failure_is_failed ()) ++ goto err; ++ ++ /* We have the binary, now spawn the subprocess. Avoid using ++ support_subprogram because we only want the program exit status, not the ++ contents. */ ++ ret = 0; ++ ++ char * const args[] = {execname, child_id, NULL}; ++ ++ status = support_subprogram_wait (args[0], args); ++ ++err: ++ if (outfd >= 0) ++ close (outfd); ++ if (infd >= 0) ++ close (infd); ++ if (execname != NULL) ++ { ++ unlink (execname); ++ free (execname); ++ } ++ if (dirname != NULL) ++ { ++ rmdir (dirname); ++ free (dirname); ++ } ++ ++ if (ret != 0) ++ FAIL_EXIT1("Failed to make sgid executable for test\n"); ++ ++ return status; ++} ++ ++int ++support_capture_subprogram_self_sgid (char *child_id) ++{ ++ gid_t target = 0; ++ const int count = 64; ++ gid_t groups[count]; ++ ++ /* Get a GID which is not our current GID, but is present in the ++ supplementary group list. */ ++ int ret = getgroups (count, groups); ++ if (ret < 0) ++ FAIL_UNSUPPORTED("Could not get group list for user %jd\n", ++ (intmax_t) getuid ()); ++ ++ gid_t current = getgid (); ++ for (int i = 0; i < ret; ++i) ++ { ++ if (groups[i] != current) ++ { ++ target = groups[i]; ++ break; ++ } ++ } ++ ++ if (target == 0) ++ FAIL_UNSUPPORTED("Could not find a suitable GID for user %jd\n", ++ (intmax_t) getuid ()); ++ ++ return copy_and_spawn_sgid (child_id, target); ++} ++ + void + support_capture_subprocess_free (struct support_capture_subprocess *p) + { +diff --git a/support/support_subprocess.c b/support/support_subprocess.c +index 36e3a77af2..4a25828111 100644 +--- a/support/support_subprocess.c ++++ b/support/support_subprocess.c +@@ -27,7 +27,7 @@ + #include <support/subprocess.h> + + static struct support_subprocess +-support_suprocess_init (void) ++support_subprocess_init (void) + { + struct support_subprocess result; + +@@ -48,7 +48,7 @@ support_suprocess_init (void) + struct support_subprocess + support_subprocess (void (*callback) (void *), void *closure) + { +- struct support_subprocess result = support_suprocess_init (); ++ struct support_subprocess result = support_subprocess_init (); + + result.pid = xfork (); + if (result.pid == 0) +@@ -71,7 +71,7 @@ support_subprocess (void (*callback) (void *), void *closure) + struct support_subprocess + support_subprogram (const char *file, char *const argv[]) + { +- struct support_subprocess result = support_suprocess_init (); ++ struct support_subprocess result = support_subprocess_init (); + + posix_spawn_file_actions_t fa; + /* posix_spawn_file_actions_init does not fail. */ +@@ -84,7 +84,7 @@ support_subprogram (const char *file, char *const argv[]) + xposix_spawn_file_actions_addclose (&fa, result.stdout_pipe[1]); + xposix_spawn_file_actions_addclose (&fa, result.stderr_pipe[1]); + +- result.pid = xposix_spawn (file, &fa, NULL, argv, NULL); ++ result.pid = xposix_spawn (file, &fa, NULL, argv, environ); + + xclose (result.stdout_pipe[1]); + xclose (result.stderr_pipe[1]); +@@ -92,6 +92,19 @@ support_subprogram (const char *file, char *const argv[]) + return result; + } + ++int ++support_subprogram_wait (const char *file, char *const argv[]) ++{ ++ posix_spawn_file_actions_t fa; ++ ++ posix_spawn_file_actions_init (&fa); ++ struct support_subprocess res = support_subprocess_init (); ++ ++ res.pid = xposix_spawn (file, &fa, NULL, argv, environ); ++ ++ return support_process_wait (&res); ++} ++ + int + support_process_wait (struct support_subprocess *proc) + { +diff --git a/support/xpthread_kill.c b/support/xpthread_kill.c +new file mode 100644 +index 0000000000..111a75d85e +--- /dev/null ++++ b/support/xpthread_kill.c +@@ -0,0 +1,26 @@ ++/* pthread_kill with error checking. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <signal.h> ++#include <support/xthread.h> ++ ++void ++xpthread_kill (pthread_t thr, int signo) ++{ ++ xpthread_check_return ("pthread_kill", pthread_kill (thr, signo)); ++} +diff --git a/support/xthread.h b/support/xthread.h +index 05f8d4a7d9..cb1fc30da0 100644 +--- a/support/xthread.h ++++ b/support/xthread.h +@@ -75,6 +75,8 @@ void xpthread_attr_setstacksize (pthread_attr_t *attr, + void xpthread_attr_setguardsize (pthread_attr_t *attr, + size_t guardsize); + ++void xpthread_kill (pthread_t thr, int signo); ++ + /* Set the stack size in ATTR to a small value, but still large enough + to cover most internal glibc stack usage. */ + void support_set_small_thread_stack_size (pthread_attr_t *attr); +diff --git a/sysdeps/aarch64/dl-bti.c b/sysdeps/aarch64/dl-bti.c +index 196e462520..cf7624aaa2 100644 +--- a/sysdeps/aarch64/dl-bti.c ++++ b/sysdeps/aarch64/dl-bti.c +@@ -19,43 +19,76 @@ + #include <errno.h> + #include <libintl.h> + #include <ldsodefs.h> ++#include <sys/mman.h> + +-static int +-enable_bti (struct link_map *map, const char *program) ++/* See elf/dl-load.h. */ ++#ifndef MAP_COPY ++# define MAP_COPY (MAP_PRIVATE | MAP_DENYWRITE) ++#endif ++ ++/* Enable BTI protection for MAP. */ ++ ++void ++_dl_bti_protect (struct link_map *map, int fd) + { ++ const size_t pagesz = GLRO(dl_pagesize); + const ElfW(Phdr) *phdr; +- unsigned prot; + + for (phdr = map->l_phdr; phdr < &map->l_phdr[map->l_phnum]; ++phdr) + if (phdr->p_type == PT_LOAD && (phdr->p_flags & PF_X)) + { +- void *start = (void *) (phdr->p_vaddr + map->l_addr); +- size_t len = phdr->p_memsz; ++ size_t vstart = ALIGN_DOWN (phdr->p_vaddr, pagesz); ++ size_t vend = ALIGN_UP (phdr->p_vaddr + phdr->p_filesz, pagesz); ++ off_t off = ALIGN_DOWN (phdr->p_offset, pagesz); ++ void *start = (void *) (vstart + map->l_addr); ++ size_t len = vend - vstart; + +- prot = PROT_EXEC | PROT_BTI; ++ unsigned prot = PROT_EXEC | PROT_BTI; + if (phdr->p_flags & PF_R) + prot |= PROT_READ; + if (phdr->p_flags & PF_W) + prot |= PROT_WRITE; + +- if (__mprotect (start, len, prot) < 0) +- { +- if (program) +- _dl_fatal_printf ("%s: mprotect failed to turn on BTI\n", +- map->l_name); +- else +- _dl_signal_error (errno, map->l_name, "dlopen", +- N_("mprotect failed to turn on BTI")); +- } ++ if (fd == -1) ++ /* Ignore failures for kernel mapped binaries. */ ++ __mprotect (start, len, prot); ++ else ++ map->l_mach.bti_fail = __mmap (start, len, prot, ++ MAP_FIXED|MAP_COPY|MAP_FILE, ++ fd, off) == MAP_FAILED; + } +- return 0; + } + +-/* Enable BTI for L if required. */ ++ ++static void ++bti_failed (struct link_map *l, const char *program) ++{ ++ if (program) ++ _dl_fatal_printf ("%s: %s: failed to turn on BTI protection\n", ++ program, l->l_name); ++ else ++ /* Note: the errno value is not available any more. */ ++ _dl_signal_error (0, l->l_name, "dlopen", ++ N_("failed to turn on BTI protection")); ++} ++ ++ ++/* Enable BTI for L and its dependencies. */ + + void + _dl_bti_check (struct link_map *l, const char *program) + { +- if (GLRO(dl_aarch64_cpu_features).bti && l->l_mach.bti) +- enable_bti (l, program); ++ if (!GLRO(dl_aarch64_cpu_features).bti) ++ return; ++ ++ if (l->l_mach.bti_fail) ++ bti_failed (l, program); ++ ++ unsigned int i = l->l_searchlist.r_nlist; ++ while (i-- > 0) ++ { ++ struct link_map *dep = l->l_initfini[i]; ++ if (dep->l_mach.bti_fail) ++ bti_failed (dep, program); ++ } + } +diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h +index 70b9ed3925..fde7cfd9e2 100644 +--- a/sysdeps/aarch64/dl-machine.h ++++ b/sysdeps/aarch64/dl-machine.h +@@ -395,13 +395,6 @@ elf_machine_lazy_rel (struct link_map *map, + /* Check for unexpected PLT reloc type. */ + if (__builtin_expect (r_type == AARCH64_R(JUMP_SLOT), 1)) + { +- if (map->l_mach.plt == 0) +- { +- /* Prelinking. */ +- *reloc_addr += l_addr; +- return; +- } +- + if (__glibc_unlikely (map->l_info[DT_AARCH64 (VARIANT_PCS)] != NULL)) + { + /* Check the symbol table for variant PCS symbols. */ +@@ -425,7 +418,10 @@ elf_machine_lazy_rel (struct link_map *map, + } + } + +- *reloc_addr = map->l_mach.plt; ++ if (map->l_mach.plt == 0) ++ *reloc_addr += l_addr; ++ else ++ *reloc_addr = map->l_mach.plt; + } + else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1)) + { +diff --git a/sysdeps/aarch64/dl-prop.h b/sysdeps/aarch64/dl-prop.h +index b0785bda83..e926e54984 100644 +--- a/sysdeps/aarch64/dl-prop.h ++++ b/sysdeps/aarch64/dl-prop.h +@@ -19,6 +19,8 @@ + #ifndef _DL_PROP_H + #define _DL_PROP_H + ++extern void _dl_bti_protect (struct link_map *, int) attribute_hidden; ++ + extern void _dl_bti_check (struct link_map *, const char *) + attribute_hidden; + +@@ -35,14 +37,18 @@ _dl_open_check (struct link_map *m) + } + + static inline void __attribute__ ((always_inline)) +-_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph) ++_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph) + { + } + + static inline int +-_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz, +- void *data) ++_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type, ++ uint32_t datasz, void *data) + { ++ if (!GLRO(dl_aarch64_cpu_features).bti) ++ /* Skip note processing. */ ++ return 0; ++ + if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) + { + /* Stop if the property note is ill-formed. */ +@@ -51,7 +57,7 @@ _dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz, + + unsigned int feature_1 = *(unsigned int *) data; + if (feature_1 & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) +- l->l_mach.bti = true; ++ _dl_bti_protect (l, fd); + + /* Stop if we processed the property note. */ + return 0; +diff --git a/sysdeps/aarch64/linkmap.h b/sysdeps/aarch64/linkmap.h +index 847a03ace2..b3f7663b07 100644 +--- a/sysdeps/aarch64/linkmap.h ++++ b/sysdeps/aarch64/linkmap.h +@@ -22,5 +22,5 @@ struct link_map_machine + { + ElfW(Addr) plt; /* Address of .plt */ + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ +- bool bti; /* Branch Target Identification is enabled. */ ++ bool bti_fail; /* Failed to enable Branch Target Identification. */ + }; +diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c +index 7cf5f033e8..799d60c98c 100644 +--- a/sysdeps/aarch64/multiarch/memcpy.c ++++ b/sysdeps/aarch64/multiarch/memcpy.c +@@ -41,7 +41,8 @@ libc_ifunc (__libc_memcpy, + ? __memcpy_falkor + : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) + ? __memcpy_thunderx2 +- : (IS_NEOVERSE_N1 (midr) ++ : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr) ++ || IS_NEOVERSE_V1 (midr) + ? __memcpy_simd + : __memcpy_generic))))); + +diff --git a/sysdeps/aarch64/multiarch/memcpy_advsimd.S b/sysdeps/aarch64/multiarch/memcpy_advsimd.S +index d4ba747777..48bb6d7ca4 100644 +--- a/sysdeps/aarch64/multiarch/memcpy_advsimd.S ++++ b/sysdeps/aarch64/multiarch/memcpy_advsimd.S +@@ -223,12 +223,13 @@ L(copy_long_backwards): + b.ls L(copy64_from_start) + + L(loop64_backwards): +- stp A_q, B_q, [dstend, -32] ++ str B_q, [dstend, -16] ++ str A_q, [dstend, -32] + ldp A_q, B_q, [srcend, -96] +- stp C_q, D_q, [dstend, -64] ++ str D_q, [dstend, -48] ++ str C_q, [dstend, -64]! + ldp C_q, D_q, [srcend, -128] + sub srcend, srcend, 64 +- sub dstend, dstend, 64 + subs count, count, 64 + b.hi L(loop64_backwards) + +diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c +index ad10aa8ac6..46a4cb3a54 100644 +--- a/sysdeps/aarch64/multiarch/memmove.c ++++ b/sysdeps/aarch64/multiarch/memmove.c +@@ -41,7 +41,8 @@ libc_ifunc (__libc_memmove, + ? __memmove_falkor + : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) + ? __memmove_thunderx2 +- : (IS_NEOVERSE_N1 (midr) ++ : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr) ++ || IS_NEOVERSE_V1 (midr) + ? __memmove_simd + : __memmove_generic))))); + +diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S +index 75393e1c18..1998ea95d4 100644 +--- a/sysdeps/aarch64/start.S ++++ b/sysdeps/aarch64/start.S +@@ -43,11 +43,9 @@ + */ + + .text +- .globl _start +- .type _start,#function +-_start: +- BTI_C ++ENTRY(_start) + /* Create an initial frame with 0 LR and FP */ ++ cfi_undefined (x30) + mov x29, #0 + mov x30, #0 + +@@ -101,8 +99,10 @@ _start: + because crt1.o and rcrt1.o share code and the later must avoid the + use of GOT relocations before __libc_start_main is called. */ + __wrap_main: ++ BTI_C + b main + #endif ++END(_start) + + /* Define a symbol for the first piece of initialized data. */ + .data +diff --git a/sysdeps/generic/dl-prop.h b/sysdeps/generic/dl-prop.h +index f1cf576fe3..df27ff8e6a 100644 +--- a/sysdeps/generic/dl-prop.h ++++ b/sysdeps/generic/dl-prop.h +@@ -37,15 +37,15 @@ _dl_open_check (struct link_map *m) + } + + static inline void __attribute__ ((always_inline)) +-_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph) ++_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph) + { + } + + /* Called for each property in the NT_GNU_PROPERTY_TYPE_0 note of L, + processing of the properties continues until this returns 0. */ + static inline int __attribute__ ((always_inline)) +-_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz, +- void *data) ++_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type, ++ uint32_t datasz, void *data) + { + return 0; + } +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index ba114ab4b1..62ac40d81b 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -919,8 +919,9 @@ extern void _dl_rtld_di_serinfo (struct link_map *loader, + Dl_serinfo *si, bool counting); + + /* Process PT_GNU_PROPERTY program header PH in module L after +- PT_LOAD segments are mapped. */ +-void _dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph); ++ PT_LOAD segments are mapped from file FD. */ ++void _dl_process_pt_gnu_property (struct link_map *l, int fd, ++ const ElfW(Phdr) *ph); + + + /* Search loaded objects' symbol tables for a definition of the symbol +diff --git a/sysdeps/generic/unwind.h b/sysdeps/generic/unwind.h +index b667a5b652..c229603af3 100644 +--- a/sysdeps/generic/unwind.h ++++ b/sysdeps/generic/unwind.h +@@ -75,15 +75,21 @@ typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code, + + struct _Unwind_Exception + { +- _Unwind_Exception_Class exception_class; +- _Unwind_Exception_Cleanup_Fn exception_cleanup; +- _Unwind_Word private_1; +- _Unwind_Word private_2; +- +- /* @@@ The IA-64 ABI says that this structure must be double-word aligned. +- Taking that literally does not make much sense generically. Instead we +- provide the maximum alignment required by any type for the machine. */ +-} __attribute__((__aligned__)); ++ union ++ { ++ struct ++ { ++ _Unwind_Exception_Class exception_class; ++ _Unwind_Exception_Cleanup_Fn exception_cleanup; ++ _Unwind_Word private_1; ++ _Unwind_Word private_2; ++ }; ++ ++ /* The IA-64 ABI says that this structure must be double-word aligned. */ ++ _Unwind_Word unwind_exception_align[2] ++ __attribute__ ((__aligned__ (2 * sizeof (_Unwind_Word)))); ++ }; ++}; + + + /* The ACTIONS argument to the personality routine is a bitwise OR of one +diff --git a/sysdeps/gnu/errlist.h b/sysdeps/gnu/errlist.h +index 5d11ed723d..6329e5f393 100644 +--- a/sysdeps/gnu/errlist.h ++++ b/sysdeps/gnu/errlist.h +@@ -1,24 +1,21 @@ +-#ifndef ERR_MAP +-#define ERR_MAP(value) value +-#endif +-_S(ERR_MAP(0), N_("Success")) ++_S(0, N_("Success")) + #ifdef EPERM + /* + TRANS Only the owner of the file (or other resource) + TRANS or processes with special privileges can perform the operation. */ +-_S(ERR_MAP(EPERM), N_("Operation not permitted")) ++_S(EPERM, N_("Operation not permitted")) + #endif + #ifdef ENOENT + /* + TRANS This is a ``file doesn't exist'' error + TRANS for ordinary files that are referenced in contexts where they are + TRANS expected to already exist. */ +-_S(ERR_MAP(ENOENT), N_("No such file or directory")) ++_S(ENOENT, N_("No such file or directory")) + #endif + #ifdef ESRCH + /* + TRANS No process matches the specified process ID. */ +-_S(ERR_MAP(ESRCH), N_("No such process")) ++_S(ESRCH, N_("No such process")) + #endif + #ifdef EINTR + /* +@@ -29,12 +26,12 @@ TRANS + TRANS You can choose to have functions resume after a signal that is handled, + TRANS rather than failing with @code{EINTR}; see @ref{Interrupted + TRANS Primitives}. */ +-_S(ERR_MAP(EINTR), N_("Interrupted system call")) ++_S(EINTR, N_("Interrupted system call")) + #endif + #ifdef EIO + /* + TRANS Usually used for physical read or write errors. */ +-_S(ERR_MAP(EIO), N_("Input/output error")) ++_S(EIO, N_("Input/output error")) + #endif + #ifdef ENXIO + /* +@@ -43,7 +40,7 @@ TRANS represented by a file you specified, and it couldn't find the device. + TRANS This can mean that the device file was installed incorrectly, or that + TRANS the physical device is missing or not correctly attached to the + TRANS computer. */ +-_S(ERR_MAP(ENXIO), N_("No such device or address")) ++_S(ENXIO, N_("No such device or address")) + #endif + #ifdef E2BIG + /* +@@ -51,27 +48,27 @@ TRANS Used when the arguments passed to a new program + TRANS being executed with one of the @code{exec} functions (@pxref{Executing a + TRANS File}) occupy too much memory space. This condition never arises on + TRANS @gnuhurdsystems{}. */ +-_S(ERR_MAP(E2BIG), N_("Argument list too long")) ++_S(E2BIG, N_("Argument list too long")) + #endif + #ifdef ENOEXEC + /* + TRANS Invalid executable file format. This condition is detected by the + TRANS @code{exec} functions; see @ref{Executing a File}. */ +-_S(ERR_MAP(ENOEXEC), N_("Exec format error")) ++_S(ENOEXEC, N_("Exec format error")) + #endif + #ifdef EBADF + /* + TRANS For example, I/O on a descriptor that has been + TRANS closed or reading from a descriptor open only for writing (or vice + TRANS versa). */ +-_S(ERR_MAP(EBADF), N_("Bad file descriptor")) ++_S(EBADF, N_("Bad file descriptor")) + #endif + #ifdef ECHILD + /* + TRANS This error happens on operations that are + TRANS supposed to manipulate child processes, when there aren't any processes + TRANS to manipulate. */ +-_S(ERR_MAP(ECHILD), N_("No child processes")) ++_S(ECHILD, N_("No child processes")) + #endif + #ifdef EDEADLK + /* +@@ -79,74 +76,74 @@ TRANS Allocating a system resource would have resulted in a + TRANS deadlock situation. The system does not guarantee that it will notice + TRANS all such situations. This error means you got lucky and the system + TRANS noticed; it might just hang. @xref{File Locks}, for an example. */ +-_S(ERR_MAP(EDEADLK), N_("Resource deadlock avoided")) ++_S(EDEADLK, N_("Resource deadlock avoided")) + #endif + #ifdef ENOMEM + /* + TRANS The system cannot allocate more virtual memory + TRANS because its capacity is full. */ +-_S(ERR_MAP(ENOMEM), N_("Cannot allocate memory")) ++_S(ENOMEM, N_("Cannot allocate memory")) + #endif + #ifdef EACCES + /* + TRANS The file permissions do not allow the attempted operation. */ +-_S(ERR_MAP(EACCES), N_("Permission denied")) ++_S(EACCES, N_("Permission denied")) + #endif + #ifdef EFAULT + /* + TRANS An invalid pointer was detected. + TRANS On @gnuhurdsystems{}, this error never happens; you get a signal instead. */ +-_S(ERR_MAP(EFAULT), N_("Bad address")) ++_S(EFAULT, N_("Bad address")) + #endif + #ifdef ENOTBLK + /* + TRANS A file that isn't a block special file was given in a situation that + TRANS requires one. For example, trying to mount an ordinary file as a file + TRANS system in Unix gives this error. */ +-_S(ERR_MAP(ENOTBLK), N_("Block device required")) ++_S(ENOTBLK, N_("Block device required")) + #endif + #ifdef EBUSY + /* + TRANS A system resource that can't be shared is already in use. + TRANS For example, if you try to delete a file that is the root of a currently + TRANS mounted filesystem, you get this error. */ +-_S(ERR_MAP(EBUSY), N_("Device or resource busy")) ++_S(EBUSY, N_("Device or resource busy")) + #endif + #ifdef EEXIST + /* + TRANS An existing file was specified in a context where it only + TRANS makes sense to specify a new file. */ +-_S(ERR_MAP(EEXIST), N_("File exists")) ++_S(EEXIST, N_("File exists")) + #endif + #ifdef EXDEV + /* + TRANS An attempt to make an improper link across file systems was detected. + TRANS This happens not only when you use @code{link} (@pxref{Hard Links}) but + TRANS also when you rename a file with @code{rename} (@pxref{Renaming Files}). */ +-_S(ERR_MAP(EXDEV), N_("Invalid cross-device link")) ++_S(EXDEV, N_("Invalid cross-device link")) + #endif + #ifdef ENODEV + /* + TRANS The wrong type of device was given to a function that expects a + TRANS particular sort of device. */ +-_S(ERR_MAP(ENODEV), N_("No such device")) ++_S(ENODEV, N_("No such device")) + #endif + #ifdef ENOTDIR + /* + TRANS A file that isn't a directory was specified when a directory is required. */ +-_S(ERR_MAP(ENOTDIR), N_("Not a directory")) ++_S(ENOTDIR, N_("Not a directory")) + #endif + #ifdef EISDIR + /* + TRANS You cannot open a directory for writing, + TRANS or create or remove hard links to it. */ +-_S(ERR_MAP(EISDIR), N_("Is a directory")) ++_S(EISDIR, N_("Is a directory")) + #endif + #ifdef EINVAL + /* + TRANS This is used to indicate various kinds of problems + TRANS with passing the wrong argument to a library function. */ +-_S(ERR_MAP(EINVAL), N_("Invalid argument")) ++_S(EINVAL, N_("Invalid argument")) + #endif + #ifdef EMFILE + /* +@@ -157,20 +154,20 @@ TRANS In BSD and GNU, the number of open files is controlled by a resource + TRANS limit that can usually be increased. If you get this error, you might + TRANS want to increase the @code{RLIMIT_NOFILE} limit or make it unlimited; + TRANS @pxref{Limits on Resources}. */ +-_S(ERR_MAP(EMFILE), N_("Too many open files")) ++_S(EMFILE, N_("Too many open files")) + #endif + #ifdef ENFILE + /* + TRANS There are too many distinct file openings in the entire system. Note + TRANS that any number of linked channels count as just one file opening; see + TRANS @ref{Linked Channels}. This error never occurs on @gnuhurdsystems{}. */ +-_S(ERR_MAP(ENFILE), N_("Too many open files in system")) ++_S(ENFILE, N_("Too many open files in system")) + #endif + #ifdef ENOTTY + /* + TRANS Inappropriate I/O control operation, such as trying to set terminal + TRANS modes on an ordinary file. */ +-_S(ERR_MAP(ENOTTY), N_("Inappropriate ioctl for device")) ++_S(ENOTTY, N_("Inappropriate ioctl for device")) + #endif + #ifdef ETXTBSY + /* +@@ -179,35 +176,35 @@ TRANS write to a file that is currently being executed. Often using a + TRANS debugger to run a program is considered having it open for writing and + TRANS will cause this error. (The name stands for ``text file busy''.) This + TRANS is not an error on @gnuhurdsystems{}; the text is copied as necessary. */ +-_S(ERR_MAP(ETXTBSY), N_("Text file busy")) ++_S(ETXTBSY, N_("Text file busy")) + #endif + #ifdef EFBIG + /* + TRANS The size of a file would be larger than allowed by the system. */ +-_S(ERR_MAP(EFBIG), N_("File too large")) ++_S(EFBIG, N_("File too large")) + #endif + #ifdef ENOSPC + /* + TRANS Write operation on a file failed because the + TRANS disk is full. */ +-_S(ERR_MAP(ENOSPC), N_("No space left on device")) ++_S(ENOSPC, N_("No space left on device")) + #endif + #ifdef ESPIPE + /* + TRANS Invalid seek operation (such as on a pipe). */ +-_S(ERR_MAP(ESPIPE), N_("Illegal seek")) ++_S(ESPIPE, N_("Illegal seek")) + #endif + #ifdef EROFS + /* + TRANS An attempt was made to modify something on a read-only file system. */ +-_S(ERR_MAP(EROFS), N_("Read-only file system")) ++_S(EROFS, N_("Read-only file system")) + #endif + #ifdef EMLINK + /* + TRANS The link count of a single file would become too large. + TRANS @code{rename} can cause this error if the file being renamed already has + TRANS as many links as it can take (@pxref{Renaming Files}). */ +-_S(ERR_MAP(EMLINK), N_("Too many links")) ++_S(EMLINK, N_("Too many links")) + #endif + #ifdef EPIPE + /* +@@ -216,19 +213,19 @@ TRANS Every library function that returns this error code also generates a + TRANS @code{SIGPIPE} signal; this signal terminates the program if not handled + TRANS or blocked. Thus, your program will never actually see @code{EPIPE} + TRANS unless it has handled or blocked @code{SIGPIPE}. */ +-_S(ERR_MAP(EPIPE), N_("Broken pipe")) ++_S(EPIPE, N_("Broken pipe")) + #endif + #ifdef EDOM + /* + TRANS Used by mathematical functions when an argument value does + TRANS not fall into the domain over which the function is defined. */ +-_S(ERR_MAP(EDOM), N_("Numerical argument out of domain")) ++_S(EDOM, N_("Numerical argument out of domain")) + #endif + #ifdef ERANGE + /* + TRANS Used by mathematical functions when the result value is + TRANS not representable because of overflow or underflow. */ +-_S(ERR_MAP(ERANGE), N_("Numerical result out of range")) ++_S(ERANGE, N_("Numerical result out of range")) + #endif + #ifdef EAGAIN + /* +@@ -261,7 +258,7 @@ TRANS Such shortages are usually fairly serious and affect the whole system, + TRANS so usually an interactive program should report the error to the user + TRANS and return to its command loop. + TRANS @end itemize */ +-_S(ERR_MAP(EAGAIN), N_("Resource temporarily unavailable")) ++_S(EAGAIN, N_("Resource temporarily unavailable")) + #endif + #ifdef EINPROGRESS + /* +@@ -273,47 +270,47 @@ TRANS the operation has begun and will take some time. Attempts to manipulate + TRANS the object before the call completes return @code{EALREADY}. You can + TRANS use the @code{select} function to find out when the pending operation + TRANS has completed; @pxref{Waiting for I/O}. */ +-_S(ERR_MAP(EINPROGRESS), N_("Operation now in progress")) ++_S(EINPROGRESS, N_("Operation now in progress")) + #endif + #ifdef EALREADY + /* + TRANS An operation is already in progress on an object that has non-blocking + TRANS mode selected. */ +-_S(ERR_MAP(EALREADY), N_("Operation already in progress")) ++_S(EALREADY, N_("Operation already in progress")) + #endif + #ifdef ENOTSOCK + /* + TRANS A file that isn't a socket was specified when a socket is required. */ +-_S(ERR_MAP(ENOTSOCK), N_("Socket operation on non-socket")) ++_S(ENOTSOCK, N_("Socket operation on non-socket")) + #endif + #ifdef EMSGSIZE + /* + TRANS The size of a message sent on a socket was larger than the supported + TRANS maximum size. */ +-_S(ERR_MAP(EMSGSIZE), N_("Message too long")) ++_S(EMSGSIZE, N_("Message too long")) + #endif + #ifdef EPROTOTYPE + /* + TRANS The socket type does not support the requested communications protocol. */ +-_S(ERR_MAP(EPROTOTYPE), N_("Protocol wrong type for socket")) ++_S(EPROTOTYPE, N_("Protocol wrong type for socket")) + #endif + #ifdef ENOPROTOOPT + /* + TRANS You specified a socket option that doesn't make sense for the + TRANS particular protocol being used by the socket. @xref{Socket Options}. */ +-_S(ERR_MAP(ENOPROTOOPT), N_("Protocol not available")) ++_S(ENOPROTOOPT, N_("Protocol not available")) + #endif + #ifdef EPROTONOSUPPORT + /* + TRANS The socket domain does not support the requested communications protocol + TRANS (perhaps because the requested protocol is completely invalid). + TRANS @xref{Creating a Socket}. */ +-_S(ERR_MAP(EPROTONOSUPPORT), N_("Protocol not supported")) ++_S(EPROTONOSUPPORT, N_("Protocol not supported")) + #endif + #ifdef ESOCKTNOSUPPORT + /* + TRANS The socket type is not supported. */ +-_S(ERR_MAP(ESOCKTNOSUPPORT), N_("Socket type not supported")) ++_S(ESOCKTNOSUPPORT, N_("Socket type not supported")) + #endif + #ifdef EOPNOTSUPP + /* +@@ -323,71 +320,71 @@ TRANS implemented for all communications protocols. On @gnuhurdsystems{}, this + TRANS error can happen for many calls when the object does not support the + TRANS particular operation; it is a generic indication that the server knows + TRANS nothing to do for that call. */ +-_S(ERR_MAP(EOPNOTSUPP), N_("Operation not supported")) ++_S(EOPNOTSUPP, N_("Operation not supported")) + #endif + #ifdef EPFNOSUPPORT + /* + TRANS The socket communications protocol family you requested is not supported. */ +-_S(ERR_MAP(EPFNOSUPPORT), N_("Protocol family not supported")) ++_S(EPFNOSUPPORT, N_("Protocol family not supported")) + #endif + #ifdef EAFNOSUPPORT + /* + TRANS The address family specified for a socket is not supported; it is + TRANS inconsistent with the protocol being used on the socket. @xref{Sockets}. */ +-_S(ERR_MAP(EAFNOSUPPORT), N_("Address family not supported by protocol")) ++_S(EAFNOSUPPORT, N_("Address family not supported by protocol")) + #endif + #ifdef EADDRINUSE + /* + TRANS The requested socket address is already in use. @xref{Socket Addresses}. */ +-_S(ERR_MAP(EADDRINUSE), N_("Address already in use")) ++_S(EADDRINUSE, N_("Address already in use")) + #endif + #ifdef EADDRNOTAVAIL + /* + TRANS The requested socket address is not available; for example, you tried + TRANS to give a socket a name that doesn't match the local host name. + TRANS @xref{Socket Addresses}. */ +-_S(ERR_MAP(EADDRNOTAVAIL), N_("Cannot assign requested address")) ++_S(EADDRNOTAVAIL, N_("Cannot assign requested address")) + #endif + #ifdef ENETDOWN + /* + TRANS A socket operation failed because the network was down. */ +-_S(ERR_MAP(ENETDOWN), N_("Network is down")) ++_S(ENETDOWN, N_("Network is down")) + #endif + #ifdef ENETUNREACH + /* + TRANS A socket operation failed because the subnet containing the remote host + TRANS was unreachable. */ +-_S(ERR_MAP(ENETUNREACH), N_("Network is unreachable")) ++_S(ENETUNREACH, N_("Network is unreachable")) + #endif + #ifdef ENETRESET + /* + TRANS A network connection was reset because the remote host crashed. */ +-_S(ERR_MAP(ENETRESET), N_("Network dropped connection on reset")) ++_S(ENETRESET, N_("Network dropped connection on reset")) + #endif + #ifdef ECONNABORTED + /* + TRANS A network connection was aborted locally. */ +-_S(ERR_MAP(ECONNABORTED), N_("Software caused connection abort")) ++_S(ECONNABORTED, N_("Software caused connection abort")) + #endif + #ifdef ECONNRESET + /* + TRANS A network connection was closed for reasons outside the control of the + TRANS local host, such as by the remote machine rebooting or an unrecoverable + TRANS protocol violation. */ +-_S(ERR_MAP(ECONNRESET), N_("Connection reset by peer")) ++_S(ECONNRESET, N_("Connection reset by peer")) + #endif + #ifdef ENOBUFS + /* + TRANS The kernel's buffers for I/O operations are all in use. In GNU, this + TRANS error is always synonymous with @code{ENOMEM}; you may get one or the + TRANS other from network operations. */ +-_S(ERR_MAP(ENOBUFS), N_("No buffer space available")) ++_S(ENOBUFS, N_("No buffer space available")) + #endif + #ifdef EISCONN + /* + TRANS You tried to connect a socket that is already connected. + TRANS @xref{Connecting}. */ +-_S(ERR_MAP(EISCONN), N_("Transport endpoint is already connected")) ++_S(EISCONN, N_("Transport endpoint is already connected")) + #endif + #ifdef ENOTCONN + /* +@@ -395,74 +392,74 @@ TRANS The socket is not connected to anything. You get this error when you + TRANS try to transmit data over a socket, without first specifying a + TRANS destination for the data. For a connectionless socket (for datagram + TRANS protocols, such as UDP), you get @code{EDESTADDRREQ} instead. */ +-_S(ERR_MAP(ENOTCONN), N_("Transport endpoint is not connected")) ++_S(ENOTCONN, N_("Transport endpoint is not connected")) + #endif + #ifdef EDESTADDRREQ + /* + TRANS No default destination address was set for the socket. You get this + TRANS error when you try to transmit data over a connectionless socket, + TRANS without first specifying a destination for the data with @code{connect}. */ +-_S(ERR_MAP(EDESTADDRREQ), N_("Destination address required")) ++_S(EDESTADDRREQ, N_("Destination address required")) + #endif + #ifdef ESHUTDOWN + /* + TRANS The socket has already been shut down. */ +-_S(ERR_MAP(ESHUTDOWN), N_("Cannot send after transport endpoint shutdown")) ++_S(ESHUTDOWN, N_("Cannot send after transport endpoint shutdown")) + #endif + #ifdef ETOOMANYREFS +-_S(ERR_MAP(ETOOMANYREFS), N_("Too many references: cannot splice")) ++_S(ETOOMANYREFS, N_("Too many references: cannot splice")) + #endif + #ifdef ETIMEDOUT + /* + TRANS A socket operation with a specified timeout received no response during + TRANS the timeout period. */ +-_S(ERR_MAP(ETIMEDOUT), N_("Connection timed out")) ++_S(ETIMEDOUT, N_("Connection timed out")) + #endif + #ifdef ECONNREFUSED + /* + TRANS A remote host refused to allow the network connection (typically because + TRANS it is not running the requested service). */ +-_S(ERR_MAP(ECONNREFUSED), N_("Connection refused")) ++_S(ECONNREFUSED, N_("Connection refused")) + #endif + #ifdef ELOOP + /* + TRANS Too many levels of symbolic links were encountered in looking up a file name. + TRANS This often indicates a cycle of symbolic links. */ +-_S(ERR_MAP(ELOOP), N_("Too many levels of symbolic links")) ++_S(ELOOP, N_("Too many levels of symbolic links")) + #endif + #ifdef ENAMETOOLONG + /* + TRANS Filename too long (longer than @code{PATH_MAX}; @pxref{Limits for + TRANS Files}) or host name too long (in @code{gethostname} or + TRANS @code{sethostname}; @pxref{Host Identification}). */ +-_S(ERR_MAP(ENAMETOOLONG), N_("File name too long")) ++_S(ENAMETOOLONG, N_("File name too long")) + #endif + #ifdef EHOSTDOWN + /* + TRANS The remote host for a requested network connection is down. */ +-_S(ERR_MAP(EHOSTDOWN), N_("Host is down")) ++_S(EHOSTDOWN, N_("Host is down")) + #endif + /* + TRANS The remote host for a requested network connection is not reachable. */ + #ifdef EHOSTUNREACH +-_S(ERR_MAP(EHOSTUNREACH), N_("No route to host")) ++_S(EHOSTUNREACH, N_("No route to host")) + #endif + #ifdef ENOTEMPTY + /* + TRANS Directory not empty, where an empty directory was expected. Typically, + TRANS this error occurs when you are trying to delete a directory. */ +-_S(ERR_MAP(ENOTEMPTY), N_("Directory not empty")) ++_S(ENOTEMPTY, N_("Directory not empty")) + #endif + #ifdef EUSERS + /* + TRANS The file quota system is confused because there are too many users. + TRANS @c This can probably happen in a GNU system when using NFS. */ +-_S(ERR_MAP(EUSERS), N_("Too many users")) ++_S(EUSERS, N_("Too many users")) + #endif + #ifdef EDQUOT + /* + TRANS The user's disk quota was exceeded. */ +-_S(ERR_MAP(EDQUOT), N_("Disk quota exceeded")) ++_S(EDQUOT, N_("Disk quota exceeded")) + #endif + #ifdef ESTALE + /* +@@ -471,7 +468,7 @@ TRANS file system which is due to file system rearrangements on the server host + TRANS for NFS file systems or corruption in other file systems. + TRANS Repairing this condition usually requires unmounting, possibly repairing + TRANS and remounting the file system. */ +-_S(ERR_MAP(ESTALE), N_("Stale file handle")) ++_S(ESTALE, N_("Stale file handle")) + #endif + #ifdef EREMOTE + /* +@@ -479,7 +476,7 @@ TRANS An attempt was made to NFS-mount a remote file system with a file name tha + TRANS already specifies an NFS-mounted file. + TRANS (This is an error on some operating systems, but we expect it to work + TRANS properly on @gnuhurdsystems{}, making this error code impossible.) */ +-_S(ERR_MAP(EREMOTE), N_("Object is remote")) ++_S(EREMOTE, N_("Object is remote")) + #endif + #ifdef ENOLCK + /* +@@ -487,7 +484,7 @@ TRANS This is used by the file locking facilities; see + TRANS @ref{File Locks}. This error is never generated by @gnuhurdsystems{}, but + TRANS it can result from an operation to an NFS server running another + TRANS operating system. */ +-_S(ERR_MAP(ENOLCK), N_("No locks available")) ++_S(ENOLCK, N_("No locks available")) + #endif + #ifdef ENOSYS + /* +@@ -496,46 +493,46 @@ TRANS not implemented at all, either in the C library itself or in the + TRANS operating system. When you get this error, you can be sure that this + TRANS particular function will always fail with @code{ENOSYS} unless you + TRANS install a new version of the C library or the operating system. */ +-_S(ERR_MAP(ENOSYS), N_("Function not implemented")) ++_S(ENOSYS, N_("Function not implemented")) + #endif + #ifdef EILSEQ + /* + TRANS While decoding a multibyte character the function came along an invalid + TRANS or an incomplete sequence of bytes or the given wide character is invalid. */ +-_S(ERR_MAP(EILSEQ), N_("Invalid or incomplete multibyte or wide character")) ++_S(EILSEQ, N_("Invalid or incomplete multibyte or wide character")) + #endif + #ifdef EBADMSG +-_S(ERR_MAP(EBADMSG), N_("Bad message")) ++_S(EBADMSG, N_("Bad message")) + #endif + #ifdef EIDRM +-_S(ERR_MAP(EIDRM), N_("Identifier removed")) ++_S(EIDRM, N_("Identifier removed")) + #endif + #ifdef EMULTIHOP +-_S(ERR_MAP(EMULTIHOP), N_("Multihop attempted")) ++_S(EMULTIHOP, N_("Multihop attempted")) + #endif + #ifdef ENODATA +-_S(ERR_MAP(ENODATA), N_("No data available")) ++_S(ENODATA, N_("No data available")) + #endif + #ifdef ENOLINK +-_S(ERR_MAP(ENOLINK), N_("Link has been severed")) ++_S(ENOLINK, N_("Link has been severed")) + #endif + #ifdef ENOMSG +-_S(ERR_MAP(ENOMSG), N_("No message of desired type")) ++_S(ENOMSG, N_("No message of desired type")) + #endif + #ifdef ENOSR +-_S(ERR_MAP(ENOSR), N_("Out of streams resources")) ++_S(ENOSR, N_("Out of streams resources")) + #endif + #ifdef ENOSTR +-_S(ERR_MAP(ENOSTR), N_("Device not a stream")) ++_S(ENOSTR, N_("Device not a stream")) + #endif + #ifdef EOVERFLOW +-_S(ERR_MAP(EOVERFLOW), N_("Value too large for defined data type")) ++_S(EOVERFLOW, N_("Value too large for defined data type")) + #endif + #ifdef EPROTO +-_S(ERR_MAP(EPROTO), N_("Protocol error")) ++_S(EPROTO, N_("Protocol error")) + #endif + #ifdef ETIME +-_S(ERR_MAP(ETIME), N_("Timer expired")) ++_S(ETIME, N_("Timer expired")) + #endif + #ifdef ECANCELED + /* +@@ -543,148 +540,148 @@ TRANS An asynchronous operation was canceled before it + TRANS completed. @xref{Asynchronous I/O}. When you call @code{aio_cancel}, + TRANS the normal result is for the operations affected to complete with this + TRANS error; @pxref{Cancel AIO Operations}. */ +-_S(ERR_MAP(ECANCELED), N_("Operation canceled")) ++_S(ECANCELED, N_("Operation canceled")) + #endif + #ifdef EOWNERDEAD +-_S(ERR_MAP(EOWNERDEAD), N_("Owner died")) ++_S(EOWNERDEAD, N_("Owner died")) + #endif + #ifdef ENOTRECOVERABLE +-_S(ERR_MAP(ENOTRECOVERABLE), N_("State not recoverable")) ++_S(ENOTRECOVERABLE, N_("State not recoverable")) + #endif + #ifdef ERESTART +-_S(ERR_MAP(ERESTART), N_("Interrupted system call should be restarted")) ++_S(ERESTART, N_("Interrupted system call should be restarted")) + #endif + #ifdef ECHRNG +-_S(ERR_MAP(ECHRNG), N_("Channel number out of range")) ++_S(ECHRNG, N_("Channel number out of range")) + #endif + #ifdef EL2NSYNC +-_S(ERR_MAP(EL2NSYNC), N_("Level 2 not synchronized")) ++_S(EL2NSYNC, N_("Level 2 not synchronized")) + #endif + #ifdef EL3HLT +-_S(ERR_MAP(EL3HLT), N_("Level 3 halted")) ++_S(EL3HLT, N_("Level 3 halted")) + #endif + #ifdef EL3RST +-_S(ERR_MAP(EL3RST), N_("Level 3 reset")) ++_S(EL3RST, N_("Level 3 reset")) + #endif + #ifdef ELNRNG +-_S(ERR_MAP(ELNRNG), N_("Link number out of range")) ++_S(ELNRNG, N_("Link number out of range")) + #endif + #ifdef EUNATCH +-_S(ERR_MAP(EUNATCH), N_("Protocol driver not attached")) ++_S(EUNATCH, N_("Protocol driver not attached")) + #endif + #ifdef ENOCSI +-_S(ERR_MAP(ENOCSI), N_("No CSI structure available")) ++_S(ENOCSI, N_("No CSI structure available")) + #endif + #ifdef EL2HLT +-_S(ERR_MAP(EL2HLT), N_("Level 2 halted")) ++_S(EL2HLT, N_("Level 2 halted")) + #endif + #ifdef EBADE +-_S(ERR_MAP(EBADE), N_("Invalid exchange")) ++_S(EBADE, N_("Invalid exchange")) + #endif + #ifdef EBADR +-_S(ERR_MAP(EBADR), N_("Invalid request descriptor")) ++_S(EBADR, N_("Invalid request descriptor")) + #endif + #ifdef EXFULL +-_S(ERR_MAP(EXFULL), N_("Exchange full")) ++_S(EXFULL, N_("Exchange full")) + #endif + #ifdef ENOANO +-_S(ERR_MAP(ENOANO), N_("No anode")) ++_S(ENOANO, N_("No anode")) + #endif + #ifdef EBADRQC +-_S(ERR_MAP(EBADRQC), N_("Invalid request code")) ++_S(EBADRQC, N_("Invalid request code")) + #endif + #ifdef EBADSLT +-_S(ERR_MAP(EBADSLT), N_("Invalid slot")) ++_S(EBADSLT, N_("Invalid slot")) + #endif + #ifdef EBFONT +-_S(ERR_MAP(EBFONT), N_("Bad font file format")) ++_S(EBFONT, N_("Bad font file format")) + #endif + #ifdef ENONET +-_S(ERR_MAP(ENONET), N_("Machine is not on the network")) ++_S(ENONET, N_("Machine is not on the network")) + #endif + #ifdef ENOPKG +-_S(ERR_MAP(ENOPKG), N_("Package not installed")) ++_S(ENOPKG, N_("Package not installed")) + #endif + #ifdef EADV +-_S(ERR_MAP(EADV), N_("Advertise error")) ++_S(EADV, N_("Advertise error")) + #endif + #ifdef ESRMNT +-_S(ERR_MAP(ESRMNT), N_("Srmount error")) ++_S(ESRMNT, N_("Srmount error")) + #endif + #ifdef ECOMM +-_S(ERR_MAP(ECOMM), N_("Communication error on send")) ++_S(ECOMM, N_("Communication error on send")) + #endif + #ifdef EDOTDOT +-_S(ERR_MAP(EDOTDOT), N_("RFS specific error")) ++_S(EDOTDOT, N_("RFS specific error")) + #endif + #ifdef ENOTUNIQ +-_S(ERR_MAP(ENOTUNIQ), N_("Name not unique on network")) ++_S(ENOTUNIQ, N_("Name not unique on network")) + #endif + #ifdef EBADFD +-_S(ERR_MAP(EBADFD), N_("File descriptor in bad state")) ++_S(EBADFD, N_("File descriptor in bad state")) + #endif + #ifdef EREMCHG +-_S(ERR_MAP(EREMCHG), N_("Remote address changed")) ++_S(EREMCHG, N_("Remote address changed")) + #endif + #ifdef ELIBACC +-_S(ERR_MAP(ELIBACC), N_("Can not access a needed shared library")) ++_S(ELIBACC, N_("Can not access a needed shared library")) + #endif + #ifdef ELIBBAD +-_S(ERR_MAP(ELIBBAD), N_("Accessing a corrupted shared library")) ++_S(ELIBBAD, N_("Accessing a corrupted shared library")) + #endif + #ifdef ELIBSCN +-_S(ERR_MAP(ELIBSCN), N_(".lib section in a.out corrupted")) ++_S(ELIBSCN, N_(".lib section in a.out corrupted")) + #endif + #ifdef ELIBMAX +-_S(ERR_MAP(ELIBMAX), N_("Attempting to link in too many shared libraries")) ++_S(ELIBMAX, N_("Attempting to link in too many shared libraries")) + #endif + #ifdef ELIBEXEC +-_S(ERR_MAP(ELIBEXEC), N_("Cannot exec a shared library directly")) ++_S(ELIBEXEC, N_("Cannot exec a shared library directly")) + #endif + #ifdef ESTRPIPE +-_S(ERR_MAP(ESTRPIPE), N_("Streams pipe error")) ++_S(ESTRPIPE, N_("Streams pipe error")) + #endif + #ifdef EUCLEAN +-_S(ERR_MAP(EUCLEAN), N_("Structure needs cleaning")) ++_S(EUCLEAN, N_("Structure needs cleaning")) + #endif + #ifdef ENOTNAM +-_S(ERR_MAP(ENOTNAM), N_("Not a XENIX named type file")) ++_S(ENOTNAM, N_("Not a XENIX named type file")) + #endif + #ifdef ENAVAIL +-_S(ERR_MAP(ENAVAIL), N_("No XENIX semaphores available")) ++_S(ENAVAIL, N_("No XENIX semaphores available")) + #endif + #ifdef EISNAM +-_S(ERR_MAP(EISNAM), N_("Is a named type file")) ++_S(EISNAM, N_("Is a named type file")) + #endif + #ifdef EREMOTEIO +-_S(ERR_MAP(EREMOTEIO), N_("Remote I/O error")) ++_S(EREMOTEIO, N_("Remote I/O error")) + #endif + #ifdef ENOMEDIUM +-_S(ERR_MAP(ENOMEDIUM), N_("No medium found")) ++_S(ENOMEDIUM, N_("No medium found")) + #endif + #ifdef EMEDIUMTYPE +-_S(ERR_MAP(EMEDIUMTYPE), N_("Wrong medium type")) ++_S(EMEDIUMTYPE, N_("Wrong medium type")) + #endif + #ifdef ENOKEY +-_S(ERR_MAP(ENOKEY), N_("Required key not available")) ++_S(ENOKEY, N_("Required key not available")) + #endif + #ifdef EKEYEXPIRED +-_S(ERR_MAP(EKEYEXPIRED), N_("Key has expired")) ++_S(EKEYEXPIRED, N_("Key has expired")) + #endif + #ifdef EKEYREVOKED +-_S(ERR_MAP(EKEYREVOKED), N_("Key has been revoked")) ++_S(EKEYREVOKED, N_("Key has been revoked")) + #endif + #ifdef EKEYREJECTED +-_S(ERR_MAP(EKEYREJECTED), N_("Key was rejected by service")) ++_S(EKEYREJECTED, N_("Key was rejected by service")) + #endif + #ifdef ERFKILL +-_S(ERR_MAP(ERFKILL), N_("Operation not possible due to RF-kill")) ++_S(ERFKILL, N_("Operation not possible due to RF-kill")) + #endif + #ifdef EHWPOISON +-_S(ERR_MAP(EHWPOISON), N_("Memory page has hardware error")) ++_S(EHWPOISON, N_("Memory page has hardware error")) + #endif + #ifdef EBADRPC +-_S(ERR_MAP(EBADRPC), N_("RPC struct is bad")) ++_S(EBADRPC, N_("RPC struct is bad")) + #endif + #ifdef EFTYPE + /* +@@ -693,40 +690,40 @@ TRANS operation, or a data file had the wrong format. + TRANS + TRANS On some systems @code{chmod} returns this error if you try to set the + TRANS sticky bit on a non-directory file; @pxref{Setting Permissions}. */ +-_S(ERR_MAP(EFTYPE), N_("Inappropriate file type or format")) ++_S(EFTYPE, N_("Inappropriate file type or format")) + #endif + #ifdef EPROCUNAVAIL +-_S(ERR_MAP(EPROCUNAVAIL), N_("RPC bad procedure for program")) ++_S(EPROCUNAVAIL, N_("RPC bad procedure for program")) + #endif + #ifdef EAUTH +-_S(ERR_MAP(EAUTH), N_("Authentication error")) ++_S(EAUTH, N_("Authentication error")) + #endif + #ifdef EDIED + /* + TRANS On @gnuhurdsystems{}, opening a file returns this error when the file is + TRANS translated by a program and the translator program dies while starting + TRANS up, before it has connected to the file. */ +-_S(ERR_MAP(EDIED), N_("Translator died")) ++_S(EDIED, N_("Translator died")) + #endif + #ifdef ERPCMISMATCH +-_S(ERR_MAP(ERPCMISMATCH), N_("RPC version wrong")) ++_S(ERPCMISMATCH, N_("RPC version wrong")) + #endif + #ifdef EGREGIOUS + /* + TRANS You did @strong{what}? */ +-_S(ERR_MAP(EGREGIOUS), N_("You really blew it this time")) ++_S(EGREGIOUS, N_("You really blew it this time")) + #endif + #ifdef EPROCLIM + /* + TRANS This means that the per-user limit on new process would be exceeded by + TRANS an attempted @code{fork}. @xref{Limits on Resources}, for details on + TRANS the @code{RLIMIT_NPROC} limit. */ +-_S(ERR_MAP(EPROCLIM), N_("Too many processes")) ++_S(EPROCLIM, N_("Too many processes")) + #endif + #ifdef EGRATUITOUS + /* + TRANS This error code has no purpose. */ +-_S(ERR_MAP(EGRATUITOUS), N_("Gratuitous error")) ++_S(EGRATUITOUS, N_("Gratuitous error")) + #endif + #if defined (ENOTSUP) && ENOTSUP != EOPNOTSUPP + /* +@@ -742,10 +739,10 @@ TRANS values. + TRANS + TRANS If the entire function is not available at all in the implementation, + TRANS it returns @code{ENOSYS} instead. */ +-_S(ERR_MAP(ENOTSUP), N_("Not supported")) ++_S(ENOTSUP, N_("Not supported")) + #endif + #ifdef EPROGMISMATCH +-_S(ERR_MAP(EPROGMISMATCH), N_("RPC program version wrong")) ++_S(EPROGMISMATCH, N_("RPC program version wrong")) + #endif + #ifdef EBACKGROUND + /* +@@ -755,7 +752,7 @@ TRANS foreground process group of the terminal. Users do not usually see this + TRANS error because functions such as @code{read} and @code{write} translate + TRANS it into a @code{SIGTTIN} or @code{SIGTTOU} signal. @xref{Job Control}, + TRANS for information on process groups and these signals. */ +-_S(ERR_MAP(EBACKGROUND), N_("Inappropriate operation for background process")) ++_S(EBACKGROUND, N_("Inappropriate operation for background process")) + #endif + #ifdef EIEIO + /* +@@ -773,7 +770,7 @@ TRANS @c "bought the farm" means "died". -jtobey + TRANS @c + TRANS @c Translators, please do not translate this litteraly, translate it into + TRANS @c an idiomatic funny way of saying that the computer died. */ +-_S(ERR_MAP(EIEIO), N_("Computer bought the farm")) ++_S(EIEIO, N_("Computer bought the farm")) + #endif + #if defined (EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + /* +@@ -782,18 +779,18 @@ TRANS The values are always the same, on every operating system. + TRANS + TRANS C libraries in many older Unix systems have @code{EWOULDBLOCK} as a + TRANS separate error code. */ +-_S(ERR_MAP(EWOULDBLOCK), N_("Operation would block")) ++_S(EWOULDBLOCK, N_("Operation would block")) + #endif + #ifdef ENEEDAUTH +-_S(ERR_MAP(ENEEDAUTH), N_("Need authenticator")) ++_S(ENEEDAUTH, N_("Need authenticator")) + #endif + #ifdef ED + /* + TRANS The experienced user will know what is wrong. + TRANS @c This error code is a joke. Its perror text is part of the joke. + TRANS @c Don't change it. */ +-_S(ERR_MAP(ED), N_("?")) ++_S(ED, N_("?")) + #endif + #ifdef EPROGUNAVAIL +-_S(ERR_MAP(EPROGUNAVAIL), N_("RPC program not available")) ++_S(EPROGUNAVAIL, N_("RPC program not available")) + #endif +diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h +index 0f08079e48..672d8f27ce 100644 +--- a/sysdeps/i386/dl-machine.h ++++ b/sysdeps/i386/dl-machine.h +@@ -338,16 +338,22 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, + { + # ifndef RTLD_BOOTSTRAP + if (sym_map != map +- && sym_map->l_type != lt_executable + && !sym_map->l_relocated) + { + const char *strtab + = (const char *) D_PTR (map, l_info[DT_STRTAB]); +- _dl_error_printf ("\ ++ if (sym_map->l_type == lt_executable) ++ _dl_fatal_printf ("\ ++%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \ ++and creates an unsatisfiable circular dependency.\n", ++ RTLD_PROGNAME, strtab + refsym->st_name, ++ map->l_name); ++ else ++ _dl_error_printf ("\ + %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", +- RTLD_PROGNAME, map->l_name, +- sym_map->l_name, +- strtab + refsym->st_name); ++ RTLD_PROGNAME, map->l_name, ++ sym_map->l_name, ++ strtab + refsym->st_name); + } + # endif + value = ((Elf32_Addr (*) (void)) value) (); +diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c +index 8a53a1088f..362a2b713c 100644 +--- a/sysdeps/powerpc/powerpc64/backtrace.c ++++ b/sysdeps/powerpc/powerpc64/backtrace.c +@@ -54,11 +54,22 @@ struct signal_frame_64 { + /* We don't care about the rest, since the IP value is at 'uc' field. */ + }; + ++/* Test if the address match to the inside the trampoline code. ++ Up to and including kernel 5.8, returning from an interrupt or syscall to a ++ signal handler starts execution directly at the handler's entry point, with ++ LR set to address of the sigreturn trampoline (the vDSO symbol). ++ Newer kernels will branch to signal handler from the trampoline instead, so ++ checking the stacktrace against the vDSO entrypoint does not work in such ++ case. ++ The vDSO branches with a 'bctrl' instruction, so checking either the ++ vDSO address itself and the next instruction should cover all kernel ++ versions. */ + static inline bool + is_sigtramp_address (void *nip) + { + #ifdef HAVE_SIGTRAMP_RT64 +- if (nip == GLRO (dl_vdso_sigtramp_rt64)) ++ if (nip == GLRO (dl_vdso_sigtramp_rt64) || ++ nip == GLRO (dl_vdso_sigtramp_rt64) + 4) + return true; + #endif + return false; +diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile +index 920d875420..bf9b7f7223 100644 +--- a/sysdeps/pthread/Makefile ++++ b/sysdeps/pthread/Makefile +@@ -107,6 +107,7 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \ + tst-unload \ + tst-unwind-thread \ + tst-pt-vfork1 tst-pt-vfork2 tst-vfork1x tst-vfork2x \ ++ tst-pthread-exit-signal \ + + + # Files which must not be linked with libpthread. +diff --git a/sysdeps/pthread/tst-pthread-exit-signal.c b/sysdeps/pthread/tst-pthread-exit-signal.c +new file mode 100644 +index 0000000000..b4526fe663 +--- /dev/null ++++ b/sysdeps/pthread/tst-pthread-exit-signal.c +@@ -0,0 +1,45 @@ ++/* Test that pending signals are not delivered on thread exit (bug 28607). ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Due to bug 28607, pthread_kill (or pthread_cancel) restored the ++ signal mask during during thread exit, triggering the delivery of a ++ blocked pending signal (SIGUSR1 in this test). */ ++ ++#include <support/xthread.h> ++#include <support/xsignal.h> ++ ++static void * ++threadfunc (void *closure) ++{ ++ sigset_t sigmask; ++ sigfillset (&sigmask); ++ xpthread_sigmask (SIG_SETMASK, &sigmask, NULL); ++ xpthread_kill (pthread_self (), SIGUSR1); ++ pthread_exit (NULL); ++ return NULL; ++} ++ ++static int ++do_test (void) ++{ ++ pthread_t thr = xpthread_create (NULL, threadfunc, NULL); ++ xpthread_join (thr); ++ return 0; ++} ++ ++#include <support/test-driver.c> +diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure +index fa46e9e351..e7f576338d 100644 +--- a/sysdeps/s390/configure ++++ b/sysdeps/s390/configure +@@ -123,7 +123,9 @@ void testinsn (char *buf) + __asm__ (".machine \"arch13\" \n\t" + ".machinemode \"zarch_nohighgprs\" \n\t" + "lghi %%r0,16 \n\t" +- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); ++ "mvcrl 0(%0),32(%0) \n\t" ++ "vstrs %%v20,%%v20,%%v20,%%v20,0,2" ++ : : "a" (buf) : "memory", "r0"); + } + EOF + if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c +@@ -271,7 +273,9 @@ else + void testinsn (char *buf) + { + __asm__ ("lghi %%r0,16 \n\t" +- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); ++ "mvcrl 0(%0),32(%0) \n\t" ++ "vstrs %%v20,%%v20,%%v20,%%v20,0,2" ++ : : "a" (buf) : "memory", "r0"); + } + EOF + if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c +diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac +index 3ed5a8ef87..5c3479e8cf 100644 +--- a/sysdeps/s390/configure.ac ++++ b/sysdeps/s390/configure.ac +@@ -88,7 +88,9 @@ void testinsn (char *buf) + __asm__ (".machine \"arch13\" \n\t" + ".machinemode \"zarch_nohighgprs\" \n\t" + "lghi %%r0,16 \n\t" +- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); ++ "mvcrl 0(%0),32(%0) \n\t" ++ "vstrs %%v20,%%v20,%%v20,%%v20,0,2" ++ : : "a" (buf) : "memory", "r0"); + } + EOF + dnl test, if assembler supports S390 arch13 instructions +@@ -195,7 +197,9 @@ cat > conftest.c <<\EOF + void testinsn (char *buf) + { + __asm__ ("lghi %%r0,16 \n\t" +- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); ++ "mvcrl 0(%0),32(%0) \n\t" ++ "vstrs %%v20,%%v20,%%v20,%%v20,0,2" ++ : : "a" (buf) : "memory", "r0"); + } + EOF + dnl test, if assembler supports S390 arch13 zarch instructions as default +diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c +index 5fc85e129f..ee59b5de14 100644 +--- a/sysdeps/s390/memmove.c ++++ b/sysdeps/s390/memmove.c +@@ -43,7 +43,7 @@ extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden; + s390_libc_ifunc_expr (__redirect_memmove, memmove, + ({ + s390_libc_ifunc_expr_stfle_init (); +- (HAVE_MEMMOVE_ARCH13 ++ (HAVE_MEMMOVE_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2) + && S390_IS_ARCH13_MIE3 (stfle_bits)) + ? MEMMOVE_ARCH13 + : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX)) +diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c +index e6195c6e26..17c0cc3952 100644 +--- a/sysdeps/s390/multiarch/ifunc-impl-list.c ++++ b/sysdeps/s390/multiarch/ifunc-impl-list.c +@@ -171,7 +171,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, memmove, + # if HAVE_MEMMOVE_ARCH13 + IFUNC_IMPL_ADD (array, i, memmove, +- S390_IS_ARCH13_MIE3 (stfle_bits), ++ ((dl_hwcap & HWCAP_S390_VXRS_EXT2) ++ && S390_IS_ARCH13_MIE3 (stfle_bits)), + MEMMOVE_ARCH13) + # endif + # if HAVE_MEMMOVE_Z13 +diff --git a/sysdeps/sh/be/sh4/fpu/Implies b/sysdeps/sh/be/sh4/fpu/Implies +new file mode 100644 +index 0000000000..71b28ee1a4 +--- /dev/null ++++ b/sysdeps/sh/be/sh4/fpu/Implies +@@ -0,0 +1 @@ ++sh/sh4/fpu +diff --git a/sysdeps/sh/le/sh4/fpu/Implies b/sysdeps/sh/le/sh4/fpu/Implies +new file mode 100644 +index 0000000000..71b28ee1a4 +--- /dev/null ++++ b/sysdeps/sh/le/sh4/fpu/Implies +@@ -0,0 +1 @@ ++sh/sh4/fpu +diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile +index 9b2a253032..34748ffcd1 100644 +--- a/sysdeps/unix/sysv/linux/Makefile ++++ b/sysdeps/unix/sysv/linux/Makefile +@@ -100,7 +100,7 @@ tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \ + tst-quota tst-sync_file_range tst-sysconf-iov_max tst-ttyname \ + test-errno-linux tst-memfd_create tst-mlock2 tst-pkey \ + tst-rlimit-infinity tst-ofdlocks tst-gettid tst-gettid-kill \ +- tst-tgkill ++ tst-tgkill tst-sysvsem-linux tst-sysvmsg-linux tst-sysvshm-linux + tests-internal += tst-ofdlocks-compat tst-sigcontext-get_pc + + CFLAGS-tst-sigcontext-get_pc.c = -fasynchronous-unwind-tables +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +index fc688450ee..00a4d0c8e7 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +@@ -54,6 +54,10 @@ + && MIDR_PARTNUM(midr) == 0x000) + #define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + && MIDR_PARTNUM(midr) == 0xd0c) ++#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ ++ && MIDR_PARTNUM(midr) == 0xd49) ++#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ ++ && MIDR_PARTNUM(midr) == 0xd40) + + #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P' \ + && MIDR_PARTNUM(midr) == 0x000) +diff --git a/sysdeps/unix/sysv/linux/mq_notify.c b/sysdeps/unix/sysv/linux/mq_notify.c +index 61bbb03b64..2bb98172c8 100644 +--- a/sysdeps/unix/sysv/linux/mq_notify.c ++++ b/sysdeps/unix/sysv/linux/mq_notify.c +@@ -132,9 +132,12 @@ helper_thread (void *arg) + to wait until it is done with it. */ + (void) __pthread_barrier_wait (¬ify_barrier); + } +- else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED) +- /* The only state we keep is the copy of the thread attributes. */ +- free (data.attr); ++ else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED && data.attr != NULL) ++ { ++ /* The only state we keep is the copy of the thread attributes. */ ++ pthread_attr_destroy (data.attr); ++ free (data.attr); ++ } + } + return NULL; + } +@@ -255,8 +258,14 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification) + if (data.attr == NULL) + return -1; + +- memcpy (data.attr, notification->sigev_notify_attributes, +- sizeof (pthread_attr_t)); ++ int ret = __pthread_attr_copy (data.attr, ++ notification->sigev_notify_attributes); ++ if (ret != 0) ++ { ++ free (data.attr); ++ __set_errno (ret); ++ return -1; ++ } + } + + /* Construct the new request. */ +@@ -269,8 +278,11 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification) + int retval = INLINE_SYSCALL (mq_notify, 2, mqdes, &se); + + /* If it failed, free the allocated memory. */ +- if (__glibc_unlikely (retval != 0)) +- free (data.attr); ++ if (retval != 0 && data.attr != NULL) ++ { ++ pthread_attr_destroy (data.attr); ++ free (data.attr); ++ } + + return retval; + } +diff --git a/sysdeps/unix/sysv/linux/msgctl.c b/sysdeps/unix/sysv/linux/msgctl.c +index 0776472d5e..a1f24ab242 100644 +--- a/sysdeps/unix/sysv/linux/msgctl.c ++++ b/sysdeps/unix/sysv/linux/msgctl.c +@@ -90,8 +90,15 @@ __msgctl64 (int msqid, int cmd, struct __msqid64_ds *buf) + struct kernel_msqid64_ds ksemid, *arg = NULL; + if (buf != NULL) + { +- msqid64_to_kmsqid64 (buf, &ksemid); +- arg = &ksemid; ++ /* This is a Linux extension where kernel returns a 'struct msginfo' ++ instead. */ ++ if (cmd == IPC_INFO || cmd == MSG_INFO) ++ arg = (struct kernel_msqid64_ds *) buf; ++ else ++ { ++ msqid64_to_kmsqid64 (buf, &ksemid); ++ arg = &ksemid; ++ } + } + # ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T + if (cmd == IPC_SET) +@@ -169,8 +176,15 @@ __msgctl (int msqid, int cmd, struct msqid_ds *buf) + struct __msqid64_ds msqid64, *buf64 = NULL; + if (buf != NULL) + { +- msqid_to_msqid64 (&msqid64, buf); +- buf64 = &msqid64; ++ /* This is a Linux extension where kernel returns a 'struct msginfo' ++ instead. */ ++ if (cmd == IPC_INFO || cmd == MSG_INFO) ++ buf64 = (struct __msqid64_ds *) buf; ++ else ++ { ++ msqid_to_msqid64 (&msqid64, buf); ++ buf64 = &msqid64; ++ } + } + + int ret = __msgctl64 (msqid, cmd, buf64); +diff --git a/sysdeps/unix/sysv/linux/semctl.c b/sysdeps/unix/sysv/linux/semctl.c +index f131a26fc7..1cdabde8f2 100644 +--- a/sysdeps/unix/sysv/linux/semctl.c ++++ b/sysdeps/unix/sysv/linux/semctl.c +@@ -102,6 +102,7 @@ semun64_to_ksemun64 (int cmd, union semun64 semun64, + r.array = semun64.array; + break; + case SEM_STAT: ++ case SEM_STAT_ANY: + case IPC_STAT: + case IPC_SET: + r.buf = buf; +@@ -150,6 +151,7 @@ __semctl64 (int semid, int semnum, int cmd, ...) + case IPC_STAT: /* arg.buf */ + case IPC_SET: + case SEM_STAT: ++ case SEM_STAT_ANY: + case IPC_INFO: /* arg.__buf */ + case SEM_INFO: + va_start (ap, cmd); +@@ -238,6 +240,7 @@ semun_to_semun64 (int cmd, union semun semun, struct __semid64_ds *semid64) + r.array = semun.array; + break; + case SEM_STAT: ++ case SEM_STAT_ANY: + case IPC_STAT: + case IPC_SET: + r.buf = semid64; +@@ -267,6 +270,7 @@ __semctl (int semid, int semnum, int cmd, ...) + case IPC_STAT: /* arg.buf */ + case IPC_SET: + case SEM_STAT: ++ case SEM_STAT_ANY: + case IPC_INFO: /* arg.__buf */ + case SEM_INFO: + va_start (ap, cmd); +@@ -321,6 +325,7 @@ __semctl_mode16 (int semid, int semnum, int cmd, ...) + case IPC_STAT: /* arg.buf */ + case IPC_SET: + case SEM_STAT: ++ case SEM_STAT_ANY: + case IPC_INFO: /* arg.__buf */ + case SEM_INFO: + va_start (ap, cmd); +@@ -354,6 +359,7 @@ __old_semctl (int semid, int semnum, int cmd, ...) + case IPC_STAT: /* arg.buf */ + case IPC_SET: + case SEM_STAT: ++ case SEM_STAT_ANY: + case IPC_INFO: /* arg.__buf */ + case SEM_INFO: + va_start (ap, cmd); +diff --git a/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies +new file mode 100644 +index 0000000000..7eeaf15a5a +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies +@@ -0,0 +1 @@ ++unix/sysv/linux/sh/sh4/fpu +diff --git a/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies +new file mode 100644 +index 0000000000..7eeaf15a5a +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies +@@ -0,0 +1 @@ ++unix/sysv/linux/sh/sh4/fpu +diff --git a/sysdeps/unix/sysv/linux/shmctl.c b/sysdeps/unix/sysv/linux/shmctl.c +index 76d88441f1..1d19a798b1 100644 +--- a/sysdeps/unix/sysv/linux/shmctl.c ++++ b/sysdeps/unix/sysv/linux/shmctl.c +@@ -90,8 +90,15 @@ __shmctl64 (int shmid, int cmd, struct __shmid64_ds *buf) + struct kernel_shmid64_ds kshmid, *arg = NULL; + if (buf != NULL) + { +- shmid64_to_kshmid64 (buf, &kshmid); +- arg = &kshmid; ++ /* This is a Linux extension where kernel expects either a ++ 'struct shminfo' (IPC_INFO) or 'struct shm_info' (SHM_INFO). */ ++ if (cmd == IPC_INFO || cmd == SHM_INFO) ++ arg = (struct kernel_shmid64_ds *) buf; ++ else ++ { ++ shmid64_to_kshmid64 (buf, &kshmid); ++ arg = &kshmid; ++ } + } + # ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T + if (cmd == IPC_SET) +@@ -107,7 +114,6 @@ __shmctl64 (int shmid, int cmd, struct __shmid64_ds *buf) + + switch (cmd) + { +- case IPC_INFO: + case IPC_STAT: + case SHM_STAT: + case SHM_STAT_ANY: +@@ -168,8 +174,15 @@ __shmctl (int shmid, int cmd, struct shmid_ds *buf) + struct __shmid64_ds shmid64, *buf64 = NULL; + if (buf != NULL) + { +- shmid_to_shmid64 (&shmid64, buf); +- buf64 = &shmid64; ++ /* This is a Linux extension where kernel expects either a ++ 'struct shminfo' (IPC_INFO) or 'struct shm_info' (SHM_INFO). */ ++ if (cmd == IPC_INFO || cmd == SHM_INFO) ++ buf64 = (struct __shmid64_ds *) buf; ++ else ++ { ++ shmid_to_shmid64 (&shmid64, buf); ++ buf64 = &shmid64; ++ } + } + + int ret = __shmctl64 (shmid, cmd, buf64); +@@ -178,7 +191,6 @@ __shmctl (int shmid, int cmd, struct shmid_ds *buf) + + switch (cmd) + { +- case IPC_INFO: + case IPC_STAT: + case SHM_STAT: + case SHM_STAT_ANY: +diff --git a/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c b/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c +new file mode 100644 +index 0000000000..630f4f792c +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c +@@ -0,0 +1,177 @@ ++/* Basic tests for Linux SYSV message queue extensions. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sys/ipc.h> ++#include <sys/msg.h> ++#include <errno.h> ++#include <stdlib.h> ++#include <stdbool.h> ++#include <stdio.h> ++ ++#include <support/check.h> ++#include <support/temp_file.h> ++ ++#define MSGQ_MODE 0644 ++ ++/* These are for the temporary file we generate. */ ++static char *name; ++static int msqid; ++ ++static void ++remove_msq (void) ++{ ++ /* Enforce message queue removal in case of early test failure. ++ Ignore error since the msg may already have being removed. */ ++ msgctl (msqid, IPC_RMID, NULL); ++} ++ ++static void ++do_prepare (int argc, char *argv[]) ++{ ++ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvmsg.", &name) != -1); ++} ++ ++#define PREPARE do_prepare ++ ++struct test_msginfo ++{ ++ int msgmax; ++ int msgmnb; ++ int msgmni; ++}; ++ ++/* It tries to obtain some system-wide SysV messsage queue information from ++ /proc to check against IPC_INFO/MSG_INFO. The /proc only returns the ++ tunables value of MSGMAX, MSGMNB, and MSGMNI. ++ ++ The kernel also returns constant value for MSGSSZ, MSGSEG and also MSGMAP, ++ MSGPOOL, and MSGTQL (for IPC_INFO). The issue to check them is they might ++ change over kernel releases. */ ++ ++static int ++read_proc_file (const char *file) ++{ ++ FILE *f = fopen (file, "r"); ++ if (f == NULL) ++ FAIL_UNSUPPORTED ("/proc is not mounted or %s is not available", file); ++ ++ int v; ++ int r = fscanf (f, "%d", & v); ++ TEST_VERIFY_EXIT (r == 1); ++ ++ fclose (f); ++ return v; ++} ++ ++ ++/* Check if the message queue with IDX (index into the kernel's internal ++ array) matches the one with KEY. The CMD is either MSG_STAT or ++ MSG_STAT_ANY. */ ++ ++static bool ++check_msginfo (int idx, key_t key, int cmd) ++{ ++ struct msqid_ds msginfo; ++ int mid = msgctl (idx, cmd, &msginfo); ++ /* Ignore unused array slot returned by the kernel or information from ++ unknown message queue. */ ++ if ((mid == -1 && errno == EINVAL) || mid != msqid) ++ return false; ++ ++ if (mid == -1) ++ FAIL_EXIT1 ("msgctl with %s failed: %m", ++ cmd == MSG_STAT ? "MSG_STAT" : "MSG_STAT_ANY"); ++ ++ TEST_COMPARE (msginfo.msg_perm.__key, key); ++ TEST_COMPARE (msginfo.msg_perm.mode, MSGQ_MODE); ++ TEST_COMPARE (msginfo.msg_qnum, 0); ++ ++ return true; ++} ++ ++static int ++do_test (void) ++{ ++ atexit (remove_msq); ++ ++ key_t key = ftok (name, 'G'); ++ if (key == -1) ++ FAIL_EXIT1 ("ftok failed: %m"); ++ ++ msqid = msgget (key, MSGQ_MODE | IPC_CREAT); ++ if (msqid == -1) ++ FAIL_EXIT1 ("msgget failed: %m"); ++ ++ struct test_msginfo tipcinfo; ++ tipcinfo.msgmax = read_proc_file ("/proc/sys/kernel/msgmax"); ++ tipcinfo.msgmnb = read_proc_file ("/proc/sys/kernel/msgmnb"); ++ tipcinfo.msgmni = read_proc_file ("/proc/sys/kernel/msgmni"); ++ ++ int msqidx; ++ ++ { ++ struct msginfo ipcinfo; ++ msqidx = msgctl (msqid, IPC_INFO, (struct msqid_ds *) &ipcinfo); ++ if (msqidx == -1) ++ FAIL_EXIT1 ("msgctl with IPC_INFO failed: %m"); ++ ++ TEST_COMPARE (ipcinfo.msgmax, tipcinfo.msgmax); ++ TEST_COMPARE (ipcinfo.msgmnb, tipcinfo.msgmnb); ++ TEST_COMPARE (ipcinfo.msgmni, tipcinfo.msgmni); ++ } ++ ++ /* Same as before but with MSG_INFO. */ ++ { ++ struct msginfo ipcinfo; ++ msqidx = msgctl (msqid, MSG_INFO, (struct msqid_ds *) &ipcinfo); ++ if (msqidx == -1) ++ FAIL_EXIT1 ("msgctl with IPC_INFO failed: %m"); ++ ++ TEST_COMPARE (ipcinfo.msgmax, tipcinfo.msgmax); ++ TEST_COMPARE (ipcinfo.msgmnb, tipcinfo.msgmnb); ++ TEST_COMPARE (ipcinfo.msgmni, tipcinfo.msgmni); ++ } ++ ++ /* We check if the created message queue shows in global list. */ ++ bool found = false; ++ for (int i = 0; i <= msqidx; i++) ++ { ++ /* We can't tell apart if MSG_STAT_ANY is not supported (kernel older ++ than 4.17) or if the index used is invalid. So it just check if the ++ value returned from a valid call matches the created message ++ queue. */ ++ check_msginfo (i, key, MSG_STAT_ANY); ++ ++ if (check_msginfo (i, key, MSG_STAT)) ++ { ++ found = true; ++ break; ++ } ++ } ++ ++ if (!found) ++ FAIL_EXIT1 ("msgctl with MSG_STAT/MSG_STAT_ANY could not find the " ++ "created message queue"); ++ ++ if (msgctl (msqid, IPC_RMID, NULL) == -1) ++ FAIL_EXIT1 ("msgctl failed"); ++ ++ return 0; ++} ++ ++#include <support/test-driver.c> +diff --git a/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c b/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c +new file mode 100644 +index 0000000000..45f19e2d37 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c +@@ -0,0 +1,184 @@ ++/* Basic tests for Linux SYSV semaphore extensions. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sys/ipc.h> ++#include <sys/sem.h> ++#include <errno.h> ++#include <stdlib.h> ++#include <stdbool.h> ++#include <stdio.h> ++ ++#include <support/check.h> ++#include <support/temp_file.h> ++ ++/* These are for the temporary file we generate. */ ++static char *name; ++static int semid; ++ ++static void ++remove_sem (void) ++{ ++ /* Enforce message queue removal in case of early test failure. ++ Ignore error since the sem may already have being removed. */ ++ semctl (semid, 0, IPC_RMID, 0); ++} ++ ++static void ++do_prepare (int argc, char *argv[]) ++{ ++ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvsem.", &name) != -1); ++} ++ ++#define PREPARE do_prepare ++ ++#define SEM_MODE 0644 ++ ++union semun ++{ ++ int val; ++ struct semid_ds *buf; ++ unsigned short *array; ++ struct seminfo *__buf; ++}; ++ ++struct test_seminfo ++{ ++ int semmsl; ++ int semmns; ++ int semopm; ++ int semmni; ++}; ++ ++/* It tries to obtain some system-wide SysV semaphore information from /proc ++ to check against IPC_INFO/SEM_INFO. The /proc only returns the tunables ++ value of SEMMSL, SEMMNS, SEMOPM, and SEMMNI. ++ ++ The kernel also returns constant value for SEMVMX, SEMMNU, SEMMAP, SEMUME, ++ and also SEMUSZ and SEMAEM (for IPC_INFO). The issue to check them is they ++ might change over kernel releases. */ ++ ++static void ++read_sem_stat (struct test_seminfo *tseminfo) ++{ ++ FILE *f = fopen ("/proc/sys/kernel/sem", "r"); ++ if (f == NULL) ++ FAIL_UNSUPPORTED ("/proc is not mounted or /proc/sys/kernel/sem is not " ++ "available"); ++ ++ int r = fscanf (f, "%d %d %d %d", ++ &tseminfo->semmsl, &tseminfo->semmns, &tseminfo->semopm, ++ &tseminfo->semmni); ++ TEST_VERIFY_EXIT (r == 4); ++ ++ fclose (f); ++} ++ ++ ++/* Check if the semaphore with IDX (index into the kernel's internal array) ++ matches the one with KEY. The CMD is either SEM_STAT or SEM_STAT_ANY. */ ++ ++static bool ++check_seminfo (int idx, key_t key, int cmd) ++{ ++ struct semid_ds seminfo; ++ int sid = semctl (idx, 0, cmd, (union semun) { .buf = &seminfo }); ++ /* Ignore unused array slot returned by the kernel or information from ++ unknown semaphores. */ ++ if ((sid == -1 && errno == EINVAL) || sid != semid) ++ return false; ++ ++ if (sid == -1) ++ FAIL_EXIT1 ("semctl with SEM_STAT failed (errno=%d)", errno); ++ ++ TEST_COMPARE (seminfo.sem_perm.__key, key); ++ TEST_COMPARE (seminfo.sem_perm.mode, SEM_MODE); ++ TEST_COMPARE (seminfo.sem_nsems, 1); ++ ++ return true; ++} ++ ++static int ++do_test (void) ++{ ++ atexit (remove_sem); ++ ++ key_t key = ftok (name, 'G'); ++ if (key == -1) ++ FAIL_EXIT1 ("ftok failed: %m"); ++ ++ semid = semget (key, 1, IPC_CREAT | IPC_EXCL | SEM_MODE); ++ if (semid == -1) ++ FAIL_EXIT1 ("semget failed: %m"); ++ ++ struct test_seminfo tipcinfo; ++ read_sem_stat (&tipcinfo); ++ ++ int semidx; ++ ++ { ++ struct seminfo ipcinfo; ++ semidx = semctl (semid, 0, IPC_INFO, (union semun) { .__buf = &ipcinfo }); ++ if (semidx == -1) ++ FAIL_EXIT1 ("semctl with IPC_INFO failed: %m"); ++ ++ TEST_COMPARE (ipcinfo.semmsl, tipcinfo.semmsl); ++ TEST_COMPARE (ipcinfo.semmns, tipcinfo.semmns); ++ TEST_COMPARE (ipcinfo.semopm, tipcinfo.semopm); ++ TEST_COMPARE (ipcinfo.semmni, tipcinfo.semmni); ++ } ++ ++ /* Same as before but with SEM_INFO. */ ++ { ++ struct seminfo ipcinfo; ++ semidx = semctl (semid, 0, SEM_INFO, (union semun) { .__buf = &ipcinfo }); ++ if (semidx == -1) ++ FAIL_EXIT1 ("semctl with IPC_INFO failed: %m"); ++ ++ TEST_COMPARE (ipcinfo.semmsl, tipcinfo.semmsl); ++ TEST_COMPARE (ipcinfo.semmns, tipcinfo.semmns); ++ TEST_COMPARE (ipcinfo.semopm, tipcinfo.semopm); ++ TEST_COMPARE (ipcinfo.semmni, tipcinfo.semmni); ++ } ++ ++ /* We check if the created semaphore shows in the system-wide status. */ ++ bool found = false; ++ for (int i = 0; i <= semidx; i++) ++ { ++ /* We can't tell apart if SEM_STAT_ANY is not supported (kernel older ++ than 4.17) or if the index used is invalid. So it just check if ++ value returned from a valid call matches the created semaphore. */ ++ check_seminfo (i, key, SEM_STAT_ANY); ++ ++ if (check_seminfo (i, key, SEM_STAT)) ++ { ++ found = true; ++ break; ++ } ++ } ++ ++ if (!found) ++ FAIL_EXIT1 ("semctl with SEM_STAT/SEM_STAT_ANY could not find the " ++ "created semaphore"); ++ ++ if (semctl (semid, 0, IPC_RMID, 0) == -1) ++ FAIL_EXIT1 ("semctl failed: %m"); ++ ++ return 0; ++} ++ ++#include <support/test-driver.c> +diff --git a/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c b/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c +new file mode 100644 +index 0000000000..bb154592a6 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c +@@ -0,0 +1,188 @@ ++/* Basic tests for Linux SYSV shared memory extensions. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sys/ipc.h> ++#include <sys/shm.h> ++#include <errno.h> ++#include <stdlib.h> ++#include <stdbool.h> ++#include <stdio.h> ++#include <unistd.h> ++#include <inttypes.h> ++#include <limits.h> ++ ++#include <support/check.h> ++#include <support/temp_file.h> ++ ++#define SHM_MODE 0644 ++ ++/* These are for the temporary file we generate. */ ++static char *name; ++static int shmid; ++static long int pgsz; ++ ++static void ++remove_shm (void) ++{ ++ /* Enforce message queue removal in case of early test failure. ++ Ignore error since the shm may already have being removed. */ ++ shmctl (shmid, IPC_RMID, NULL); ++} ++ ++static void ++do_prepare (int argc, char *argv[]) ++{ ++ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvshm.", &name) != -1); ++} ++ ++#define PREPARE do_prepare ++ ++struct test_shminfo ++{ ++ __syscall_ulong_t shmall; ++ __syscall_ulong_t shmmax; ++ __syscall_ulong_t shmmni; ++}; ++ ++/* It tries to obtain some system-wide SysV shared memory information from ++ /proc to check against IPC_INFO/SHM_INFO. The /proc only returns the ++ tunables value of SHMALL, SHMMAX, and SHMMNI. */ ++ ++static uint64_t ++read_proc_file (const char *file) ++{ ++ FILE *f = fopen (file, "r"); ++ if (f == NULL) ++ FAIL_UNSUPPORTED ("/proc is not mounted or %s is not available", file); ++ ++ /* Handle 32-bit binaries running on 64-bit kernels. */ ++ uint64_t v; ++ int r = fscanf (f, "%" SCNu64, &v); ++ TEST_VERIFY_EXIT (r == 1); ++ ++ fclose (f); ++ return v; ++} ++ ++ ++/* Check if the message queue with IDX (index into the kernel's internal ++ array) matches the one with KEY. The CMD is either SHM_STAT or ++ SHM_STAT_ANY. */ ++ ++static bool ++check_shminfo (int idx, key_t key, int cmd) ++{ ++ struct shmid_ds shminfo; ++ int sid = shmctl (idx, cmd, &shminfo); ++ /* Ignore unused array slot returned by the kernel or information from ++ unknown message queue. */ ++ if ((sid == -1 && errno == EINVAL) || sid != shmid) ++ return false; ++ ++ if (sid == -1) ++ FAIL_EXIT1 ("shmctl with %s failed: %m", ++ cmd == SHM_STAT ? "SHM_STAT" : "SHM_STAT_ANY"); ++ ++ TEST_COMPARE (shminfo.shm_perm.__key, key); ++ TEST_COMPARE (shminfo.shm_perm.mode, SHM_MODE); ++ TEST_COMPARE (shminfo.shm_segsz, pgsz); ++ ++ return true; ++} ++ ++static int ++do_test (void) ++{ ++ atexit (remove_shm); ++ ++ pgsz = sysconf (_SC_PAGESIZE); ++ if (pgsz == -1) ++ FAIL_EXIT1 ("sysconf (_SC_PAGESIZE) failed: %m"); ++ ++ key_t key = ftok (name, 'G'); ++ if (key == -1) ++ FAIL_EXIT1 ("ftok failed: %m"); ++ ++ shmid = shmget (key, pgsz, IPC_CREAT | IPC_EXCL | SHM_MODE); ++ if (shmid == -1) ++ FAIL_EXIT1 ("shmget failed: %m"); ++ ++ /* It does not check shmmax because kernel clamp its value to INT_MAX for: ++ ++ 1. Compat symbols with IPC_64, i.e, 32-bit binaries running on 64-bit ++ kernels. ++ ++ 2. Default symbol without IPC_64 (defined as IPC_OLD within Linux) and ++ glibc always use IPC_64 for 32-bit ABIs (to support 64-bit time_t). ++ It means that 32-bit binaries running on 32-bit kernels will not see ++ shmmax being clamped. ++ ++ And finding out whether the compat symbol is used would require checking ++ the underlying kernel against the current ABI. The shmall and shmmni ++ already provided enough coverage. */ ++ ++ struct test_shminfo tipcinfo; ++ tipcinfo.shmall = read_proc_file ("/proc/sys/kernel/shmall"); ++ tipcinfo.shmmni = read_proc_file ("/proc/sys/kernel/shmmni"); ++ ++ int shmidx; ++ ++ /* Note: SHM_INFO does not return a shminfo, but rather a 'struct shm_info'. ++ It is tricky to verify its values since the syscall returns system wide ++ resources consumed by shared memory. The shmctl implementation handles ++ SHM_INFO as IPC_INFO, so the IPC_INFO test should validate SHM_INFO as ++ well. */ ++ ++ { ++ struct shminfo ipcinfo; ++ shmidx = shmctl (shmid, IPC_INFO, (struct shmid_ds *) &ipcinfo); ++ if (shmidx == -1) ++ FAIL_EXIT1 ("shmctl with IPC_INFO failed: %m"); ++ ++ TEST_COMPARE (ipcinfo.shmall, tipcinfo.shmall); ++ TEST_COMPARE (ipcinfo.shmmni, tipcinfo.shmmni); ++ } ++ ++ /* We check if the created shared memory shows in the global list. */ ++ bool found = false; ++ for (int i = 0; i <= shmidx; i++) ++ { ++ /* We can't tell apart if SHM_STAT_ANY is not supported (kernel older ++ than 4.17) or if the index used is invalid. So it just check if ++ value returned from a valid call matches the created message ++ queue. */ ++ check_shminfo (i, key, SHM_STAT_ANY); ++ ++ if (check_shminfo (i, key, SHM_STAT)) ++ { ++ found = true; ++ break; ++ } ++ } ++ ++ if (!found) ++ FAIL_EXIT1 ("shmctl with SHM_STAT/SHM_STAT_ANY could not find the " ++ "created shared memory"); ++ ++ if (shmctl (shmid, IPC_RMID, NULL) == -1) ++ FAIL_EXIT1 ("shmctl failed"); ++ ++ return 0; ++} ++ ++#include <support/test-driver.c> +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index a6736aef25..e821d95fa3 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -12,10 +12,39 @@ endif + ifeq ($(subdir),setjmp) + gen-as-const-headers += jmp_buf-ssp.sym + sysdep_routines += __longjmp_cancel ++ifneq ($(enable-cet),no) ++ifneq ($(have-tunables),no) ++tests += tst-setjmp-cet ++tst-setjmp-cet-ENV = GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on ++endif ++endif + endif + + ifeq ($(subdir),string) + sysdep_routines += cacheinfo ++ ++tests += \ ++ tst-memchr-rtm \ ++ tst-memcmp-rtm \ ++ tst-memmove-rtm \ ++ tst-memrchr-rtm \ ++ tst-memset-rtm \ ++ tst-strchr-rtm \ ++ tst-strcpy-rtm \ ++ tst-strlen-rtm \ ++ tst-strncmp-rtm \ ++ tst-strrchr-rtm ++ ++CFLAGS-tst-memchr-rtm.c += -mrtm ++CFLAGS-tst-memcmp-rtm.c += -mrtm ++CFLAGS-tst-memmove-rtm.c += -mrtm ++CFLAGS-tst-memrchr-rtm.c += -mrtm ++CFLAGS-tst-memset-rtm.c += -mrtm ++CFLAGS-tst-strchr-rtm.c += -mrtm ++CFLAGS-tst-strcpy-rtm.c += -mrtm ++CFLAGS-tst-strlen-rtm.c += -mrtm ++CFLAGS-tst-strncmp-rtm.c += -mrtm ++CFLAGS-tst-strrchr-rtm.c += -mrtm + endif + + ifneq ($(enable-cet),no) +diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c +index 217c21c34f..3fb4a028d8 100644 +--- a/sysdeps/x86/cacheinfo.c ++++ b/sysdeps/x86/cacheinfo.c +@@ -808,7 +808,7 @@ init_cacheinfo (void) + threads = 1 << ((ecx >> 12) & 0x0f); + } + +- if (threads == 0) ++ if (threads == 0 || cpu_features->basic.family >= 0x17) + { + /* If APIC ID width is not available, use logical + processor count. */ +@@ -823,8 +823,22 @@ init_cacheinfo (void) + if (threads > 0) + shared /= threads; + +- /* Account for exclusive L2 and L3 caches. */ +- shared += core; ++ /* Get shared cache per ccx for Zen architectures. */ ++ if (cpu_features->basic.family >= 0x17) ++ { ++ unsigned int eax; ++ ++ /* Get number of threads share the L3 cache in CCX. */ ++ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx); ++ ++ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1; ++ shared *= threads_per_ccx; ++ } ++ else ++ { ++ /* Account for exclusive L2 and L3 caches. */ ++ shared += core; ++ } + } + } + +@@ -854,14 +868,20 @@ init_cacheinfo (void) + __x86_shared_cache_size = shared; + } + +- /* The large memcpy micro benchmark in glibc shows that 6 times of +- shared cache size is the approximate value above which non-temporal +- store becomes faster on a 8-core processor. This is the 3/4 of the +- total shared cache size. */ ++ /* The default setting for the non_temporal threshold is 3/4 of one ++ thread's share of the chip's cache. For most Intel and AMD processors ++ with an initial release date between 2017 and 2020, a thread's typical ++ share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4 ++ threshold leaves 125 KBytes to 500 KBytes of the thread's data ++ in cache after a maximum temporal copy, which will maintain ++ in cache a reasonable portion of the thread's stack and other ++ active data. If the threshold is set higher than one thread's ++ share of the cache, it has a substantial risk of negatively ++ impacting the performance of other threads running on the chip. */ + __x86_shared_non_temporal_threshold + = (cpu_features->non_temporal_threshold != 0 + ? cpu_features->non_temporal_threshold +- : __x86_shared_cache_size * threads * 3 / 4); ++ : __x86_shared_cache_size * 3 / 4); + + /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */ + unsigned int minimum_rep_movsb_threshold; +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c +index 4c24ba7c31..484efe7a0f 100644 +--- a/sysdeps/x86/cpu-features.c ++++ b/sysdeps/x86/cpu-features.c +@@ -71,7 +71,6 @@ update_usable (struct cpu_features *cpu_features) + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_6); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_7); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_9); +- CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_11); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_12); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_13); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_17); +@@ -318,6 +317,9 @@ update_usable (struct cpu_features *cpu_features) + /* Determine if PKU is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, OSPKE)) + CPU_FEATURE_SET (cpu_features, PKU); ++ ++ if (CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT)) ++ CPU_FEATURE_UNSET (cpu_features, RTM); + } + + static void +@@ -516,11 +518,39 @@ init_cpu_features (struct cpu_features *cpu_features) + break; + } + +- /* Disable TSX on some Haswell processors to avoid TSX on kernels that +- weren't updated with the latest microcode package (which disables +- broken feature by default). */ ++ /* Disable TSX on some processors to avoid TSX on kernels that ++ weren't updated with the latest microcode package (which ++ disables broken feature by default). */ + switch (model) + { ++ case 0x55: ++ if (stepping <= 5) ++ goto disable_tsx; ++ break; ++ case 0x8e: ++ /* NB: Although the errata documents that for model == 0x8e, ++ only 0xb stepping or lower are impacted, the intention of ++ the errata was to disable TSX on all client processors on ++ all steppings. Include 0xc stepping which is an Intel ++ Core i7-8665U, a client mobile processor. */ ++ case 0x9e: ++ if (stepping > 0xc) ++ break; ++ /* Fall through. */ ++ case 0x4e: ++ case 0x5e: ++ { ++ /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for ++ processors listed in: ++ ++https://www.intel.com/content/www/us/en/support/articles/000059422/processor... ++ */ ++disable_tsx: ++ CPU_FEATURE_UNSET (cpu_features, HLE); ++ CPU_FEATURE_UNSET (cpu_features, RTM); ++ CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT); ++ } ++ break; + case 0x3f: + /* Xeon E7 v3 with stepping >= 4 has working TSX. */ + if (stepping >= 4) +@@ -546,8 +576,24 @@ init_cpu_features (struct cpu_features *cpu_features) + cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER] + |= bit_arch_Prefer_No_VZEROUPPER; + else +- cpu_features->preferred[index_arch_Prefer_No_AVX512] +- |= bit_arch_Prefer_No_AVX512; ++ { ++ cpu_features->preferred[index_arch_Prefer_No_AVX512] ++ |= bit_arch_Prefer_No_AVX512; ++ ++ /* Avoid RTM abort triggered by VZEROUPPER inside a ++ transactionally executing RTM region. */ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER] ++ |= bit_arch_Prefer_No_VZEROUPPER; ++ ++ /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp ++ requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp ++ requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB, ++ AVX2 strcmp is faster than EVEX strcmp. */ ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)) ++ cpu_features->preferred[index_arch_Prefer_AVX2_STRCMP] ++ |= bit_arch_Prefer_AVX2_STRCMP; ++ } + } + /* This spells out "AuthenticAMD" or "HygonGenuine". */ + else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) +diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h +index a0b9b9177c..8995a15f09 100644 +--- a/sysdeps/x86/cpu-features.h ++++ b/sysdeps/x86/cpu-features.h +@@ -295,7 +295,7 @@ extern const struct cpu_features *__get_cpu_features (void) + #define bit_cpu_AVX512_VP2INTERSECT (1u << 8) + #define bit_cpu_INDEX_7_EDX_9 (1u << 9) + #define bit_cpu_MD_CLEAR (1u << 10) +-#define bit_cpu_INDEX_7_EDX_11 (1u << 11) ++#define bit_cpu_RTM_ALWAYS_ABORT (1u << 11) + #define bit_cpu_INDEX_7_EDX_12 (1u << 12) + #define bit_cpu_INDEX_7_EDX_13 (1u << 13) + #define bit_cpu_SERIALIZE (1u << 14) +@@ -508,7 +508,7 @@ extern const struct cpu_features *__get_cpu_features (void) + #define index_cpu_AVX512_VP2INTERSECT COMMON_CPUID_INDEX_7 + #define index_cpu_INDEX_7_EDX_9 COMMON_CPUID_INDEX_7 + #define index_cpu_MD_CLEAR COMMON_CPUID_INDEX_7 +-#define index_cpu_INDEX_7_EDX_11 COMMON_CPUID_INDEX_7 ++#define index_cpu_RTM_ALWAYS_ABORT COMMON_CPUID_INDEX_7 + #define index_cpu_INDEX_7_EDX_12 COMMON_CPUID_INDEX_7 + #define index_cpu_INDEX_7_EDX_13 COMMON_CPUID_INDEX_7 + #define index_cpu_SERIALIZE COMMON_CPUID_INDEX_7 +@@ -721,7 +721,7 @@ extern const struct cpu_features *__get_cpu_features (void) + #define reg_AVX512_VP2INTERSECT edx + #define reg_INDEX_7_EDX_9 edx + #define reg_MD_CLEAR edx +-#define reg_INDEX_7_EDX_11 edx ++#define reg_RTM_ALWAYS_ABORT edx + #define reg_INDEX_7_EDX_12 edx + #define reg_INDEX_7_EDX_13 edx + #define reg_SERIALIZE edx +@@ -804,6 +804,7 @@ extern const struct cpu_features *__get_cpu_features (void) + #define bit_arch_Prefer_FSRM (1u << 13) + #define bit_arch_Prefer_No_AVX512 (1u << 14) + #define bit_arch_MathVec_Prefer_No_AVX512 (1u << 15) ++#define bit_arch_Prefer_AVX2_STRCMP (1u << 16) + + #define index_arch_Fast_Rep_String PREFERRED_FEATURE_INDEX_1 + #define index_arch_Fast_Copy_Backward PREFERRED_FEATURE_INDEX_1 +@@ -821,6 +822,7 @@ extern const struct cpu_features *__get_cpu_features (void) + #define index_arch_Prefer_No_AVX512 PREFERRED_FEATURE_INDEX_1 + #define index_arch_MathVec_Prefer_No_AVX512 PREFERRED_FEATURE_INDEX_1 + #define index_arch_Prefer_FSRM PREFERRED_FEATURE_INDEX_1 ++#define index_arch_Prefer_AVX2_STRCMP PREFERRED_FEATURE_INDEX_1 + + /* XCR0 Feature flags. */ + #define bit_XMM_state (1u << 1) +diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c +index 588bbf9448..b251a91af3 100644 +--- a/sysdeps/x86/cpu-tunables.c ++++ b/sysdeps/x86/cpu-tunables.c +@@ -238,6 +238,8 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) + CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, + Fast_Copy_Backward, + disable, 18); ++ CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH ++ (n, cpu_features, Prefer_AVX2_STRCMP, AVX2, disable, 18); + } + break; + case 19: +diff --git a/sysdeps/x86/dl-cet.c b/sysdeps/x86/dl-cet.c +index 03572f7af6..3cc54a8d53 100644 +--- a/sysdeps/x86/dl-cet.c ++++ b/sysdeps/x86/dl-cet.c +@@ -47,7 +47,10 @@ dl_cet_check (struct link_map *m, const char *program) + /* No legacy object check if both IBT and SHSTK are always on. */ + if (enable_ibt_type == cet_always_on + && enable_shstk_type == cet_always_on) +- return; ++ { ++ THREAD_SETMEM (THREAD_SELF, header.feature_1, GL(dl_x86_feature_1)); ++ return; ++ } + + /* Check if IBT is enabled by kernel. */ + bool ibt_enabled +diff --git a/sysdeps/x86/dl-prop.h b/sysdeps/x86/dl-prop.h +index 89911e19e2..4eb3b85a7b 100644 +--- a/sysdeps/x86/dl-prop.h ++++ b/sysdeps/x86/dl-prop.h +@@ -145,15 +145,15 @@ _dl_process_cet_property_note (struct link_map *l, + } + + static inline void __attribute__ ((unused)) +-_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph) ++_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph) + { + const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr); + _dl_process_cet_property_note (l, note, ph->p_memsz, ph->p_align); + } + + static inline int __attribute__ ((always_inline)) +-_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz, +- void *data) ++_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type, ++ uint32_t datasz, void *data) + { + return 0; + } +diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c +index 080c58e70b..527de3b5d9 100644 +--- a/sysdeps/x86/tst-get-cpu-features.c ++++ b/sysdeps/x86/tst-get-cpu-features.c +@@ -183,6 +183,7 @@ do_test (void) + CHECK_CPU_FEATURE (FSRM); + CHECK_CPU_FEATURE (AVX512_VP2INTERSECT); + CHECK_CPU_FEATURE (MD_CLEAR); ++ CHECK_CPU_FEATURE (RTM_ALWAYS_ABORT); + CHECK_CPU_FEATURE (SERIALIZE); + CHECK_CPU_FEATURE (HYBRID); + CHECK_CPU_FEATURE (TSXLDTRK); +@@ -336,6 +337,7 @@ do_test (void) + CHECK_CPU_FEATURE_USABLE (FSRM); + CHECK_CPU_FEATURE_USABLE (AVX512_VP2INTERSECT); + CHECK_CPU_FEATURE_USABLE (MD_CLEAR); ++ CHECK_CPU_FEATURE_USABLE (RTM_ALWAYS_ABORT); + CHECK_CPU_FEATURE_USABLE (SERIALIZE); + CHECK_CPU_FEATURE_USABLE (HYBRID); + CHECK_CPU_FEATURE_USABLE (TSXLDTRK); +diff --git a/sysdeps/x86/tst-memchr-rtm.c b/sysdeps/x86/tst-memchr-rtm.c +new file mode 100644 +index 0000000000..e47494011e +--- /dev/null ++++ b/sysdeps/x86/tst-memchr-rtm.c +@@ -0,0 +1,54 @@ ++/* Test case for memchr inside a transactionally executing RTM region. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <tst-string-rtm.h> ++ ++#define LOOP 3000 ++#define STRING_SIZE 1024 ++char string1[STRING_SIZE]; ++ ++__attribute__ ((noinline, noclone)) ++static int ++prepare (void) ++{ ++ memset (string1, 'a', STRING_SIZE); ++ string1[100] = 'c'; ++ string1[STRING_SIZE - 100] = 'c'; ++ char *p = memchr (string1, 'c', STRING_SIZE); ++ if (p == &string1[100]) ++ return EXIT_SUCCESS; ++ else ++ return EXIT_FAILURE; ++} ++ ++__attribute__ ((noinline, noclone)) ++static int ++function (void) ++{ ++ char *p = memchr (string1, 'c', STRING_SIZE); ++ if (p == &string1[100]) ++ return 0; ++ else ++ return 1; ++} ++ ++static int ++do_test (void) ++{ ++ return do_test_1 ("memchr", LOOP, prepare, function); ++} +diff --git a/sysdeps/x86/tst-memcmp-rtm.c b/sysdeps/x86/tst-memcmp-rtm.c +new file mode 100644 +index 0000000000..e4c8a623bb +--- /dev/null ++++ b/sysdeps/x86/tst-memcmp-rtm.c +@@ -0,0 +1,52 @@ ++/* Test case for memcmp inside a transactionally executing RTM region. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <tst-string-rtm.h> ++ ++#define LOOP 3000 ++#define STRING_SIZE 1024 ++char string1[STRING_SIZE]; ++char string2[STRING_SIZE]; ++ ++__attribute__ ((noinline, noclone)) ++static int ++prepare (void) ++{ ++ memset (string1, 'a', STRING_SIZE); ++ memset (string2, 'a', STRING_SIZE); ++ if (memcmp (string1, string2, STRING_SIZE) == 0) ++ return EXIT_SUCCESS; ++ else ++ return EXIT_FAILURE; ++} ++ ++__attribute__ ((noinline, noclone)) ++static int ++function (void) ++{ ++ if (memcmp (string1, string2, STRING_SIZE) == 0) ++ return 0; ++ else ++ return 1; ++} ++ ++static int ++do_test (void) ++{ ++ return do_test_1 ("memcmp", LOOP, prepare, function); ++} +diff --git a/sysdeps/x86/tst-memmove-rtm.c b/sysdeps/x86/tst-memmove-rtm.c +new file mode 100644 +index 0000000000..4bf97ef1e3 +--- /dev/null ++++ b/sysdeps/x86/tst-memmove-rtm.c +@@ -0,0 +1,53 @@ ++/* Test case for memmove inside a transactionally executing RTM region. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <tst-string-rtm.h> ++ ++#define LOOP 3000 ++#define STRING_SIZE 1024 ++char string1[STRING_SIZE]; ++char string2[STRING_SIZE]; ++ ++__attribute__ ((noinline, noclone)) ++static int ++prepare (void) ++{ ++ memset (string1, 'a', STRING_SIZE); ++ if (memmove (string2, string1, STRING_SIZE) == string2 ++ && memcmp (string2, string1, STRING_SIZE) == 0) ++ return EXIT_SUCCESS; ++ else ++ return EXIT_FAILURE; ++} ++ ++__attribute__ ((noinline, noclone)) ++static int ++function (void) ++{ ++ if (memmove (string2, string1, STRING_SIZE) == string2 ++ && memcmp (string2, string1, STRING_SIZE) == 0) ++ return 0; ++ else ++ return 1; ++} ++ ++static int ++do_test (void) ++{ ++ return do_test_1 ("memmove", LOOP, prepare, function); ++} +diff --git a/sysdeps/x86/tst-memrchr-rtm.c b/sysdeps/x86/tst-memrchr-rtm.c +new file mode 100644 +index 0000000000..a57a5a8eb9 +--- /dev/null ++++ b/sysdeps/x86/tst-memrchr-rtm.c +@@ -0,0 +1,54 @@ ++/* Test case for memrchr inside a transactionally executing RTM region. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <tst-string-rtm.h> ++ ++#define LOOP 3000 ++#define STRING_SIZE 1024 ++char string1[STRING_SIZE]; ++ ++__attribute__ ((noinline, noclone)) ++static int ++prepare (void) ++{ ++ memset (string1, 'a', STRING_SIZE); ++ string1[100] = 'c'; ++ string1[STRING_SIZE - 100] = 'c'; ++ char *p = memrchr (string1, 'c', STRING_SIZE); ++ if (p == &string1[STRING_SIZE - 100]) ++ return EXIT_SUCCESS; ++ else ++ return EXIT_FAILURE; ++} ++ ++__attribute__ ((noinline, noclone)) ++static int ++function (void) ++{ ++ char *p = memrchr (string1, 'c', STRING_SIZE); ++ if (p == &string1[STRING_SIZE - 100]) ++ return 0; ++ else ++ return 1; ++} ++ ++static int ++do_test (void) ++{ ++ return do_test_1 ("memrchr", LOOP, prepare, function); ++} +diff --git a/sysdeps/x86/tst-memset-rtm.c b/sysdeps/x86/tst-memset-rtm.c +new file mode 100644 +index 0000000000..bf343a4dad +--- /dev/null ++++ b/sysdeps/x86/tst-memset-rtm.c +@@ -0,0 +1,45 @@ ++/* Test case for memset inside a transactionally executing RTM region. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <tst-string-rtm.h> ++ ++#define LOOP 3000 ++#define STRING_SIZE 1024 ++char string1[STRING_SIZE]; ++ ++__attribute__ ((noinline, noclone)) ++static int ++prepare (void) ++{ ++ memset (string1, 'a', STRING_SIZE); ++ return EXIT_SUCCESS; ++} ++ ++__attribute__ ((noinline, noclone)) ++static int ++function (void) ++{ ++ memset (string1, 'a', STRING_SIZE); ++ return 0; ++} ++ ++static int ++do_test (void) ++{ ++ return do_test_1 ("memset", LOOP, prepare, function); ++} +diff --git a/sysdeps/x86/tst-setjmp-cet.c b/sysdeps/x86/tst-setjmp-cet.c +new file mode 100644 +index 0000000000..42c795d2a8 +--- /dev/null ++++ b/sysdeps/x86/tst-setjmp-cet.c +@@ -0,0 +1 @@ ++#include <setjmp/tst-setjmp.c> +diff --git a/sysdeps/x86/tst-strchr-rtm.c b/sysdeps/x86/tst-strchr-rtm.c +new file mode 100644 +index 0000000000..a82e29c072 +--- /dev/null ++++ b/sysdeps/x86/tst-strchr-rtm.c +@@ -0,0 +1,54 @@ ++/* Test case for strchr inside a transactionally executing RTM region. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <tst-string-rtm.h> ++ ++#define LOOP 3000 ++#define STRING_SIZE 1024 ++char string1[STRING_SIZE]; ++ ++__attribute__ ((noinline, noclone)) ++static int ++prepare (void) ++{ ++ memset (string1, 'a', STRING_SIZE - 1); ++ string1[100] = 'c'; ++ string1[STRING_SIZE - 100] = 'c'; ++ char *p = strchr (string1, 'c'); ++ if (p == &string1[100]) ++ return EXIT_SUCCESS; ++ else ++ return EXIT_FAILURE; ++} ++ ++__attribute__ ((noinline, noclone)) ++static int ++function (void) ++{ ++ char *p = strchr (string1, 'c'); ++ if (p == &string1[100]) ++ return 0; ++ else ++ return 1; ++} ++ ++static int ++do_test (void) ++{ ++ return do_test_1 ("strchr", LOOP, prepare, function); ++} +diff --git a/sysdeps/x86/tst-strcpy-rtm.c b/sysdeps/x86/tst-strcpy-rtm.c +new file mode 100644 +index 0000000000..2b2a583fb4 +--- /dev/null ++++ b/sysdeps/x86/tst-strcpy-rtm.c +@@ -0,0 +1,53 @@ ++/* Test case for strcpy inside a transactionally executing RTM region. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <tst-string-rtm.h> ++ ++#define LOOP 3000 ++#define STRING_SIZE 1024 ++char string1[STRING_SIZE]; ++char string2[STRING_SIZE]; ++ ++__attribute__ ((noinline, noclone)) ++static int ++prepare (void) ++{ ++ memset (string1, 'a', STRING_SIZE - 1); ++ if (strcpy (string2, string1) == string2 ++ && strcmp (string2, string1) == 0) ++ return EXIT_SUCCESS; ++ else ++ return EXIT_FAILURE; ++} ++ ++__attribute__ ((noinline, noclone)) ++static int ++function (void) ++{ ++ if (strcpy (string2, string1) == string2 ++ && strcmp (string2, string1) == 0) ++ return 0; ++ else ++ return 1; ++} ++ ++static int ++do_test (void) ++{ ++ return do_test_1 ("strcpy", LOOP, prepare, function); ++} +diff --git a/sysdeps/x86/tst-string-rtm.h b/sysdeps/x86/tst-string-rtm.h +new file mode 100644 +index 0000000000..6ed9eca017 +--- /dev/null ++++ b/sysdeps/x86/tst-string-rtm.h +@@ -0,0 +1,72 @@ ++/* Test string function in a transactionally executing RTM region. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <string.h> ++#include <x86intrin.h> ++#include <cpu-features.h> ++#include <support/check.h> ++#include <support/test-driver.h> ++ ++static int ++do_test_1 (const char *name, unsigned int loop, int (*prepare) (void), ++ int (*function) (void)) ++{ ++ if (!CPU_FEATURE_USABLE (RTM)) ++ return EXIT_UNSUPPORTED; ++ ++ int status = prepare (); ++ if (status != EXIT_SUCCESS) ++ return status; ++ ++ unsigned int i; ++ unsigned int naborts = 0; ++ unsigned int failed = 0; ++ for (i = 0; i < loop; i++) ++ { ++ failed |= function (); ++ if (_xbegin() == _XBEGIN_STARTED) ++ { ++ failed |= function (); ++ _xend(); ++ } ++ else ++ { ++ failed |= function (); ++ ++naborts; ++ } ++ } ++ ++ if (failed) ++ FAIL_EXIT1 ("%s() failed", name); ++ ++ if (naborts) ++ { ++ /* NB: Low single digit (<= 5%) noise-level aborts are normal for ++ TSX. */ ++ double rate = 100 * ((double) naborts) / ((double) loop); ++ if (rate > 5) ++ FAIL_EXIT1 ("TSX abort rate: %.2f%% (%d out of %d)", ++ rate, naborts, loop); ++ } ++ ++ return EXIT_SUCCESS; ++} ++ ++static int do_test (void); ++ ++#include <support/test-driver.c> +diff --git a/sysdeps/x86/tst-strlen-rtm.c b/sysdeps/x86/tst-strlen-rtm.c +new file mode 100644 +index 0000000000..0dcf14db87 +--- /dev/null ++++ b/sysdeps/x86/tst-strlen-rtm.c +@@ -0,0 +1,53 @@ ++/* Test case for strlen inside a transactionally executing RTM region. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <tst-string-rtm.h> ++ ++#define LOOP 3000 ++#define STRING_SIZE 1024 ++char string1[STRING_SIZE]; ++ ++__attribute__ ((noinline, noclone)) ++static int ++prepare (void) ++{ ++ memset (string1, 'a', STRING_SIZE - 1); ++ string1[STRING_SIZE - 100] = '\0'; ++ size_t len = strlen (string1); ++ if (len == STRING_SIZE - 100) ++ return EXIT_SUCCESS; ++ else ++ return EXIT_FAILURE; ++} ++ ++__attribute__ ((noinline, noclone)) ++static int ++function (void) ++{ ++ size_t len = strlen (string1); ++ if (len == STRING_SIZE - 100) ++ return 0; ++ else ++ return 1; ++} ++ ++static int ++do_test (void) ++{ ++ return do_test_1 ("strlen", LOOP, prepare, function); ++} +diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c +new file mode 100644 +index 0000000000..236ad951b5 +--- /dev/null ++++ b/sysdeps/x86/tst-strncmp-rtm.c +@@ -0,0 +1,52 @@ ++/* Test case for strncmp inside a transactionally executing RTM region. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <tst-string-rtm.h> ++ ++#define LOOP 3000 ++#define STRING_SIZE 1024 ++char string1[STRING_SIZE]; ++char string2[STRING_SIZE]; ++ ++__attribute__ ((noinline, noclone)) ++static int ++prepare (void) ++{ ++ memset (string1, 'a', STRING_SIZE - 1); ++ memset (string2, 'a', STRING_SIZE - 1); ++ if (strncmp (string1, string2, STRING_SIZE) == 0) ++ return EXIT_SUCCESS; ++ else ++ return EXIT_FAILURE; ++} ++ ++__attribute__ ((noinline, noclone)) ++static int ++function (void) ++{ ++ if (strncmp (string1, string2, STRING_SIZE) == 0) ++ return 0; ++ else ++ return 1; ++} ++ ++static int ++do_test (void) ++{ ++ return do_test_1 ("strncmp", LOOP, prepare, function); ++} +diff --git a/sysdeps/x86/tst-strrchr-rtm.c b/sysdeps/x86/tst-strrchr-rtm.c +new file mode 100644 +index 0000000000..e32bfaf5f5 +--- /dev/null ++++ b/sysdeps/x86/tst-strrchr-rtm.c +@@ -0,0 +1,53 @@ ++/* Test case for strrchr inside a transactionally executing RTM region. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <tst-string-rtm.h> ++ ++#define LOOP 3000 ++#define STRING_SIZE 1024 ++char string1[STRING_SIZE]; ++ ++__attribute__ ((noinline, noclone)) ++static int ++prepare (void) ++{ ++ memset (string1, 'a', STRING_SIZE - 1); ++ string1[STRING_SIZE - 100] = 'c'; ++ char *p = strrchr (string1, 'c'); ++ if (p == &string1[STRING_SIZE - 100]) ++ return EXIT_SUCCESS; ++ else ++ return EXIT_FAILURE; ++} ++ ++__attribute__ ((noinline, noclone)) ++static int ++function (void) ++{ ++ char *p = strrchr (string1, 'c'); ++ if (p == &string1[STRING_SIZE - 100]) ++ return 0; ++ else ++ return 1; ++} ++ ++static int ++do_test (void) ++{ ++ return do_test_1 ("strrchr", LOOP, prepare, function); ++} +diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile +index 42b97c5cc7..020044da80 100644 +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -20,6 +20,8 @@ endif + ifeq ($(subdir),string) + sysdep_routines += strcasecmp_l-nonascii strncase_l-nonascii + gen-as-const-headers += locale-defines.sym ++tests += \ ++ tst-rsi-strlen + endif + + ifeq ($(subdir),elf) +@@ -150,6 +152,11 @@ ifeq ($(subdir),csu) + gen-as-const-headers += tlsdesc.sym rtld-offsets.sym + endif + ++ifeq ($(subdir),wcsmbs) ++tests += \ ++ tst-rsi-wcslen ++endif ++ + $(objpfx)x86_64/tst-x86_64mod-1.os: $(objpfx)tst-x86_64mod-1.os + $(make-target-directory) + rm -f $@ +diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure +old mode 100644 +new mode 100755 +index 84f82c2406..fc1840e23f +--- a/sysdeps/x86_64/configure ++++ b/sysdeps/x86_64/configure +@@ -107,39 +107,6 @@ if test x"$build_mathvec" = xnotset; then + build_mathvec=yes + fi + +-if test "$static_pie" = yes; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5 +-$as_echo_n "checking for linker static PIE support... " >&6; } +-if ${libc_cv_ld_static_pie+:} false; then : +- $as_echo_n "(cached) " >&6 +-else +- cat > conftest.s <<\EOF +- .text +- .global _start +- .weak foo +-_start: +- leaq foo(%rip), %rax +-EOF +- libc_cv_pie_option="-Wl,-pie" +- if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5' +- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 +- (eval $ac_try) 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; }; then +- libc_cv_ld_static_pie=yes +- else +- libc_cv_ld_static_pie=no +- fi +-rm -f conftest* +-fi +-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5 +-$as_echo "$libc_cv_ld_static_pie" >&6; } +- if test "$libc_cv_ld_static_pie" != yes; then +- as_fn_error $? "linker support for static PIE needed" "$LINENO" 5 +- fi +-fi +- + $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h + + +diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac +index cdaba0c075..611a7d9ba3 100644 +--- a/sysdeps/x86_64/configure.ac ++++ b/sysdeps/x86_64/configure.ac +@@ -53,31 +53,6 @@ if test x"$build_mathvec" = xnotset; then + build_mathvec=yes + fi + +-dnl Check if linker supports static PIE with the fix for +-dnl +-dnl https://sourceware.org/bugzilla/show_bug.cgi?id=21782 +-dnl +-if test "$static_pie" = yes; then +- AC_CACHE_CHECK(for linker static PIE support, libc_cv_ld_static_pie, [dnl +-cat > conftest.s <<\EOF +- .text +- .global _start +- .weak foo +-_start: +- leaq foo(%rip), %rax +-EOF +- libc_cv_pie_option="-Wl,-pie" +- if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&AS_MESSAGE_LOG_FD); then +- libc_cv_ld_static_pie=yes +- else +- libc_cv_ld_static_pie=no +- fi +-rm -f conftest*]) +- if test "$libc_cv_ld_static_pie" != yes; then +- AC_MSG_ERROR([linker support for static PIE needed]) +- fi +-fi +- + dnl It is always possible to access static and hidden symbols in an + dnl position independent way. + AC_DEFINE(PI_STATIC_AND_HIDDEN) +diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h +index ca73d8fef9..363a749cb2 100644 +--- a/sysdeps/x86_64/dl-machine.h ++++ b/sysdeps/x86_64/dl-machine.h +@@ -315,16 +315,22 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + { + # ifndef RTLD_BOOTSTRAP + if (sym_map != map +- && sym_map->l_type != lt_executable + && !sym_map->l_relocated) + { + const char *strtab + = (const char *) D_PTR (map, l_info[DT_STRTAB]); +- _dl_error_printf ("\ ++ if (sym_map->l_type == lt_executable) ++ _dl_fatal_printf ("\ ++%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \ ++and creates an unsatisfiable circular dependency.\n", ++ RTLD_PROGNAME, strtab + refsym->st_name, ++ map->l_name); ++ else ++ _dl_error_printf ("\ + %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", +- RTLD_PROGNAME, map->l_name, +- sym_map->l_name, +- strtab + refsym->st_name); ++ RTLD_PROGNAME, map->l_name, ++ sym_map->l_name, ++ strtab + refsym->st_name); + } + # endif + value = ((ElfW(Addr) (*) (void)) value) (); +diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h +index 7659758972..e5fd5ac9cb 100644 +--- a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h ++++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h +@@ -32,7 +32,7 @@ IFUNC_SELECTOR (void) + && CPU_FEATURE_USABLE_P (cpu_features, AVX2)) + return OPTIMIZE (fma); + +- if (CPU_FEATURE_USABLE_P (cpu_features, FMA)) ++ if (CPU_FEATURE_USABLE_P (cpu_features, FMA4)) + return OPTIMIZE (fma4); + + return OPTIMIZE (sse2); +diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S +index a5c879d2af..070e5ef90b 100644 +--- a/sysdeps/x86_64/memchr.S ++++ b/sysdeps/x86_64/memchr.S +@@ -21,9 +21,11 @@ + #ifdef USE_AS_WMEMCHR + # define MEMCHR wmemchr + # define PCMPEQ pcmpeqd ++# define CHAR_PER_VEC 4 + #else + # define MEMCHR memchr + # define PCMPEQ pcmpeqb ++# define CHAR_PER_VEC 16 + #endif + + /* fast SSE2 version with using pmaxub and 64 byte loop */ +@@ -33,15 +35,14 @@ ENTRY(MEMCHR) + movd %esi, %xmm1 + mov %edi, %ecx + ++#ifdef __ILP32__ ++ /* Clear the upper 32 bits. */ ++ movl %edx, %edx ++#endif + #ifdef USE_AS_WMEMCHR + test %RDX_LP, %RDX_LP + jz L(return_null) +- shl $2, %RDX_LP + #else +-# ifdef __ILP32__ +- /* Clear the upper 32 bits. */ +- movl %edx, %edx +-# endif + punpcklbw %xmm1, %xmm1 + test %RDX_LP, %RDX_LP + jz L(return_null) +@@ -60,13 +61,16 @@ ENTRY(MEMCHR) + test %eax, %eax + + jnz L(matches_1) +- sub $16, %rdx ++ sub $CHAR_PER_VEC, %rdx + jbe L(return_null) + add $16, %rdi + and $15, %ecx + and $-16, %rdi ++#ifdef USE_AS_WMEMCHR ++ shr $2, %ecx ++#endif + add %rcx, %rdx +- sub $64, %rdx ++ sub $(CHAR_PER_VEC * 4), %rdx + jbe L(exit_loop) + jmp L(loop_prolog) + +@@ -77,16 +81,21 @@ L(crosscache): + movdqa (%rdi), %xmm0 + + PCMPEQ %xmm1, %xmm0 +-/* Check if there is a match. */ ++ /* Check if there is a match. */ + pmovmskb %xmm0, %eax +-/* Remove the leading bytes. */ ++ /* Remove the leading bytes. */ + sar %cl, %eax + test %eax, %eax + je L(unaligned_no_match) +-/* Check which byte is a match. */ ++ /* Check which byte is a match. */ + bsf %eax, %eax +- ++#ifdef USE_AS_WMEMCHR ++ mov %eax, %esi ++ shr $2, %esi ++ sub %rsi, %rdx ++#else + sub %rax, %rdx ++#endif + jbe L(return_null) + add %rdi, %rax + add %rcx, %rax +@@ -94,15 +103,18 @@ L(crosscache): + + .p2align 4 + L(unaligned_no_match): +- /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using ++ /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using + "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void + possible addition overflow. */ + neg %rcx + add $16, %rcx ++#ifdef USE_AS_WMEMCHR ++ shr $2, %ecx ++#endif + sub %rcx, %rdx + jbe L(return_null) + add $16, %rdi +- sub $64, %rdx ++ sub $(CHAR_PER_VEC * 4), %rdx + jbe L(exit_loop) + + .p2align 4 +@@ -135,7 +147,7 @@ L(loop_prolog): + test $0x3f, %rdi + jz L(align64_loop) + +- sub $64, %rdx ++ sub $(CHAR_PER_VEC * 4), %rdx + jbe L(exit_loop) + + movdqa (%rdi), %xmm0 +@@ -167,11 +179,14 @@ L(loop_prolog): + mov %rdi, %rcx + and $-64, %rdi + and $63, %ecx ++#ifdef USE_AS_WMEMCHR ++ shr $2, %ecx ++#endif + add %rcx, %rdx + + .p2align 4 + L(align64_loop): +- sub $64, %rdx ++ sub $(CHAR_PER_VEC * 4), %rdx + jbe L(exit_loop) + movdqa (%rdi), %xmm0 + movdqa 16(%rdi), %xmm2 +@@ -218,7 +233,7 @@ L(align64_loop): + + .p2align 4 + L(exit_loop): +- add $32, %edx ++ add $(CHAR_PER_VEC * 2), %edx + jle L(exit_loop_32) + + movdqa (%rdi), %xmm0 +@@ -238,7 +253,7 @@ L(exit_loop): + pmovmskb %xmm3, %eax + test %eax, %eax + jnz L(matches32_1) +- sub $16, %edx ++ sub $CHAR_PER_VEC, %edx + jle L(return_null) + + PCMPEQ 48(%rdi), %xmm1 +@@ -250,13 +265,13 @@ L(exit_loop): + + .p2align 4 + L(exit_loop_32): +- add $32, %edx ++ add $(CHAR_PER_VEC * 2), %edx + movdqa (%rdi), %xmm0 + PCMPEQ %xmm1, %xmm0 + pmovmskb %xmm0, %eax + test %eax, %eax + jnz L(matches_1) +- sub $16, %edx ++ sub $CHAR_PER_VEC, %edx + jbe L(return_null) + + PCMPEQ 16(%rdi), %xmm1 +@@ -293,7 +308,13 @@ L(matches32): + .p2align 4 + L(matches_1): + bsf %eax, %eax ++#ifdef USE_AS_WMEMCHR ++ mov %eax, %esi ++ shr $2, %esi ++ sub %rsi, %rdx ++#else + sub %rax, %rdx ++#endif + jbe L(return_null) + add %rdi, %rax + ret +@@ -301,7 +322,13 @@ L(matches_1): + .p2align 4 + L(matches16_1): + bsf %eax, %eax ++#ifdef USE_AS_WMEMCHR ++ mov %eax, %esi ++ shr $2, %esi ++ sub %rsi, %rdx ++#else + sub %rax, %rdx ++#endif + jbe L(return_null) + lea 16(%rdi, %rax), %rax + ret +@@ -309,7 +336,13 @@ L(matches16_1): + .p2align 4 + L(matches32_1): + bsf %eax, %eax ++#ifdef USE_AS_WMEMCHR ++ mov %eax, %esi ++ shr $2, %esi ++ sub %rsi, %rdx ++#else + sub %rax, %rdx ++#endif + jbe L(return_null) + lea 32(%rdi, %rax), %rax + ret +@@ -317,7 +350,13 @@ L(matches32_1): + .p2align 4 + L(matches48_1): + bsf %eax, %eax ++#ifdef USE_AS_WMEMCHR ++ mov %eax, %esi ++ shr $2, %esi ++ sub %rsi, %rdx ++#else + sub %rax, %rdx ++#endif + jbe L(return_null) + lea 48(%rdi, %rax), %rax + ret +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index 395e432c09..da1446d731 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -43,7 +43,45 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \ + memmove-avx512-unaligned-erms \ + memset-sse2-unaligned-erms \ + memset-avx2-unaligned-erms \ +- memset-avx512-unaligned-erms ++ memset-avx512-unaligned-erms \ ++ memchr-avx2-rtm \ ++ memcmp-avx2-movbe-rtm \ ++ memmove-avx-unaligned-erms-rtm \ ++ memrchr-avx2-rtm \ ++ memset-avx2-unaligned-erms-rtm \ ++ rawmemchr-avx2-rtm \ ++ strchr-avx2-rtm \ ++ strcmp-avx2-rtm \ ++ strchrnul-avx2-rtm \ ++ stpcpy-avx2-rtm \ ++ stpncpy-avx2-rtm \ ++ strcat-avx2-rtm \ ++ strcpy-avx2-rtm \ ++ strlen-avx2-rtm \ ++ strncat-avx2-rtm \ ++ strncmp-avx2-rtm \ ++ strncpy-avx2-rtm \ ++ strnlen-avx2-rtm \ ++ strrchr-avx2-rtm \ ++ memchr-evex \ ++ memcmp-evex-movbe \ ++ memmove-evex-unaligned-erms \ ++ memrchr-evex \ ++ memset-evex-unaligned-erms \ ++ rawmemchr-evex \ ++ stpcpy-evex \ ++ stpncpy-evex \ ++ strcat-evex \ ++ strchr-evex \ ++ strchrnul-evex \ ++ strcmp-evex \ ++ strcpy-evex \ ++ strlen-evex \ ++ strncat-evex \ ++ strncmp-evex \ ++ strncpy-evex \ ++ strnlen-evex \ ++ strrchr-evex + CFLAGS-varshift.c += -msse4 + CFLAGS-strcspn-c.c += -msse4 + CFLAGS-strpbrk-c.c += -msse4 +@@ -59,8 +97,24 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \ + wcscpy-ssse3 wcscpy-c \ + wcschr-sse2 wcschr-avx2 \ + wcsrchr-sse2 wcsrchr-avx2 \ +- wcsnlen-sse4_1 wcsnlen-c \ +- wcslen-sse2 wcslen-avx2 wcsnlen-avx2 ++ wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \ ++ wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \ ++ wcschr-avx2-rtm \ ++ wcscmp-avx2-rtm \ ++ wcslen-avx2-rtm \ ++ wcsncmp-avx2-rtm \ ++ wcsnlen-avx2-rtm \ ++ wcsrchr-avx2-rtm \ ++ wmemchr-avx2-rtm \ ++ wmemcmp-avx2-movbe-rtm \ ++ wcschr-evex \ ++ wcscmp-evex \ ++ wcslen-evex \ ++ wcsncmp-evex \ ++ wcsnlen-evex \ ++ wcsrchr-evex \ ++ wmemchr-evex \ ++ wmemcmp-evex-movbe + endif + + ifeq ($(subdir),debug) +diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h +index f4e311d470..f450c786f0 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h ++++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h +@@ -21,16 +21,28 @@ + + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + +- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) +- && CPU_FEATURE_USABLE_P (cpu_features, AVX2) ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) +- return OPTIMIZE (avx2); ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) ++ return OPTIMIZE (evex); ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ return OPTIMIZE (avx2_rtm); ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) ++ return OPTIMIZE (avx2); ++ } + + return OPTIMIZE (sse2); + } +diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +index f93ec39d98..920e64241e 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +@@ -43,6 +43,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memchr, + CPU_FEATURE_USABLE (AVX2), + __memchr_avx2) ++ IFUNC_IMPL_ADD (array, i, memchr, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memchr_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, memchr, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __memchr_evex) + IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2)) + + /* Support sysdeps/x86_64/multiarch/memcmp.c. */ +@@ -51,6 +60,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (MOVBE)), + __memcmp_avx2_movbe) ++ IFUNC_IMPL_ADD (array, i, memcmp, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (MOVBE) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memcmp_avx2_movbe_rtm) ++ IFUNC_IMPL_ADD (array, i, memcmp, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (MOVBE)), ++ __memcmp_evex_movbe) + IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSE4_1), + __memcmp_sse4_1) + IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSSE3), +@@ -64,10 +83,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + CPU_FEATURE_USABLE (AVX512F), + __memmove_chk_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, __memmove_chk, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __memmove_chk_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, __memmove_chk, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __memmove_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + CPU_FEATURE_USABLE (AVX), +@@ -75,6 +94,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, __memmove_chk, + CPU_FEATURE_USABLE (AVX), + __memmove_chk_avx_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, __memmove_chk, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memmove_chk_avx_unaligned_rtm) ++ IFUNC_IMPL_ADD (array, i, __memmove_chk, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memmove_chk_avx_unaligned_erms_rtm) ++ IFUNC_IMPL_ADD (array, i, __memmove_chk, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __memmove_chk_evex_unaligned) ++ IFUNC_IMPL_ADD (array, i, __memmove_chk, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __memmove_chk_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + CPU_FEATURE_USABLE (SSSE3), + __memmove_chk_ssse3_back) +@@ -97,14 +130,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memmove, + CPU_FEATURE_USABLE (AVX), + __memmove_avx_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, memmove, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memmove_avx_unaligned_rtm) ++ IFUNC_IMPL_ADD (array, i, memmove, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memmove_avx_unaligned_erms_rtm) ++ IFUNC_IMPL_ADD (array, i, memmove, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __memmove_evex_unaligned) ++ IFUNC_IMPL_ADD (array, i, memmove, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __memmove_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memmove, + CPU_FEATURE_USABLE (AVX512F), + __memmove_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, memmove, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __memmove_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, memmove, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __memmove_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memmove, CPU_FEATURE_USABLE (SSSE3), + __memmove_ssse3_back) +@@ -121,6 +168,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memrchr, + CPU_FEATURE_USABLE (AVX2), + __memrchr_avx2) ++ IFUNC_IMPL_ADD (array, i, memrchr, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memrchr_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, memrchr, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __memrchr_evex) ++ + IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2)) + + #ifdef SHARED +@@ -139,10 +195,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + CPU_FEATURE_USABLE (AVX2), + __memset_chk_avx2_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memset_chk, +- CPU_FEATURE_USABLE (AVX512F), ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memset_chk_avx2_unaligned_rtm) ++ IFUNC_IMPL_ADD (array, i, __memset_chk, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memset_chk_avx2_unaligned_erms_rtm) ++ IFUNC_IMPL_ADD (array, i, __memset_chk, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __memset_chk_evex_unaligned) ++ IFUNC_IMPL_ADD (array, i, __memset_chk, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __memset_chk_evex_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, __memset_chk, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), + __memset_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memset_chk, +- CPU_FEATURE_USABLE (AVX512F), ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), + __memset_chk_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, __memset_chk, + CPU_FEATURE_USABLE (AVX512F), +@@ -164,10 +238,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + CPU_FEATURE_USABLE (AVX2), + __memset_avx2_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memset, +- CPU_FEATURE_USABLE (AVX512F), ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memset_avx2_unaligned_rtm) ++ IFUNC_IMPL_ADD (array, i, memset, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memset_avx2_unaligned_erms_rtm) ++ IFUNC_IMPL_ADD (array, i, memset, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __memset_evex_unaligned) ++ IFUNC_IMPL_ADD (array, i, memset, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __memset_evex_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, memset, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), + __memset_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memset, +- CPU_FEATURE_USABLE (AVX512F), ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), + __memset_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, memset, + CPU_FEATURE_USABLE (AVX512F), +@@ -179,20 +271,51 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, rawmemchr, + CPU_FEATURE_USABLE (AVX2), + __rawmemchr_avx2) ++ IFUNC_IMPL_ADD (array, i, rawmemchr, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __rawmemchr_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, rawmemchr, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __rawmemchr_evex) + IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2)) + + /* Support sysdeps/x86_64/multiarch/strlen.c. */ + IFUNC_IMPL (i, name, strlen, + IFUNC_IMPL_ADD (array, i, strlen, +- CPU_FEATURE_USABLE (AVX2), ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (BMI2)), + __strlen_avx2) ++ IFUNC_IMPL_ADD (array, i, strlen, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (BMI2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strlen_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, strlen, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __strlen_evex) + IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2)) + + /* Support sysdeps/x86_64/multiarch/strnlen.c. */ + IFUNC_IMPL (i, name, strnlen, + IFUNC_IMPL_ADD (array, i, strnlen, +- CPU_FEATURE_USABLE (AVX2), ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (BMI2)), + __strnlen_avx2) ++ IFUNC_IMPL_ADD (array, i, strnlen, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (BMI2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strnlen_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, strnlen, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __strnlen_evex) + IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2)) + + /* Support sysdeps/x86_64/multiarch/stpncpy.c. */ +@@ -201,6 +324,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + __stpncpy_ssse3) + IFUNC_IMPL_ADD (array, i, stpncpy, CPU_FEATURE_USABLE (AVX2), + __stpncpy_avx2) ++ IFUNC_IMPL_ADD (array, i, stpncpy, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __stpncpy_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, stpncpy, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __stpncpy_evex) + IFUNC_IMPL_ADD (array, i, stpncpy, 1, + __stpncpy_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2)) +@@ -211,6 +342,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + __stpcpy_ssse3) + IFUNC_IMPL_ADD (array, i, stpcpy, CPU_FEATURE_USABLE (AVX2), + __stpcpy_avx2) ++ IFUNC_IMPL_ADD (array, i, stpcpy, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __stpcpy_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, stpcpy, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __stpcpy_evex) + IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2)) + +@@ -245,6 +384,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, strcat, + IFUNC_IMPL_ADD (array, i, strcat, CPU_FEATURE_USABLE (AVX2), + __strcat_avx2) ++ IFUNC_IMPL_ADD (array, i, strcat, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strcat_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, strcat, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __strcat_evex) + IFUNC_IMPL_ADD (array, i, strcat, CPU_FEATURE_USABLE (SSSE3), + __strcat_ssse3) + IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned) +@@ -255,6 +402,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strchr, + CPU_FEATURE_USABLE (AVX2), + __strchr_avx2) ++ IFUNC_IMPL_ADD (array, i, strchr, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strchr_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, strchr, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __strchr_evex) + IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf) + IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2)) + +@@ -263,6 +419,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strchrnul, + CPU_FEATURE_USABLE (AVX2), + __strchrnul_avx2) ++ IFUNC_IMPL_ADD (array, i, strchrnul, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strchrnul_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, strchrnul, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __strchrnul_evex) + IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2)) + + /* Support sysdeps/x86_64/multiarch/strrchr.c. */ +@@ -270,6 +435,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strrchr, + CPU_FEATURE_USABLE (AVX2), + __strrchr_avx2) ++ IFUNC_IMPL_ADD (array, i, strrchr, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strrchr_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, strrchr, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __strrchr_evex) + IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2)) + + /* Support sysdeps/x86_64/multiarch/strcmp.c. */ +@@ -277,6 +450,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strcmp, + CPU_FEATURE_USABLE (AVX2), + __strcmp_avx2) ++ IFUNC_IMPL_ADD (array, i, strcmp, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strcmp_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, strcmp, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __strcmp_evex) + IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSE4_2), + __strcmp_sse42) + IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSSE3), +@@ -288,6 +470,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, strcpy, + IFUNC_IMPL_ADD (array, i, strcpy, CPU_FEATURE_USABLE (AVX2), + __strcpy_avx2) ++ IFUNC_IMPL_ADD (array, i, strcpy, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strcpy_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, strcpy, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __strcpy_evex) + IFUNC_IMPL_ADD (array, i, strcpy, CPU_FEATURE_USABLE (SSSE3), + __strcpy_ssse3) + IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned) +@@ -331,6 +521,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, strncat, + IFUNC_IMPL_ADD (array, i, strncat, CPU_FEATURE_USABLE (AVX2), + __strncat_avx2) ++ IFUNC_IMPL_ADD (array, i, strncat, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strncat_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, strncat, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __strncat_evex) + IFUNC_IMPL_ADD (array, i, strncat, CPU_FEATURE_USABLE (SSSE3), + __strncat_ssse3) + IFUNC_IMPL_ADD (array, i, strncat, 1, +@@ -341,6 +539,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, strncpy, + IFUNC_IMPL_ADD (array, i, strncpy, CPU_FEATURE_USABLE (AVX2), + __strncpy_avx2) ++ IFUNC_IMPL_ADD (array, i, strncpy, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strncpy_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, strncpy, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __strncpy_evex) + IFUNC_IMPL_ADD (array, i, strncpy, CPU_FEATURE_USABLE (SSSE3), + __strncpy_ssse3) + IFUNC_IMPL_ADD (array, i, strncpy, 1, +@@ -370,6 +576,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, wcschr, + CPU_FEATURE_USABLE (AVX2), + __wcschr_avx2) ++ IFUNC_IMPL_ADD (array, i, wcschr, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __wcschr_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, wcschr, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __wcschr_evex) + IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2)) + + /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */ +@@ -377,6 +592,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, wcsrchr, + CPU_FEATURE_USABLE (AVX2), + __wcsrchr_avx2) ++ IFUNC_IMPL_ADD (array, i, wcsrchr, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __wcsrchr_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, wcsrchr, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __wcsrchr_evex) + IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2)) + + /* Support sysdeps/x86_64/multiarch/wcscmp.c. */ +@@ -384,6 +608,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, wcscmp, + CPU_FEATURE_USABLE (AVX2), + __wcscmp_avx2) ++ IFUNC_IMPL_ADD (array, i, wcscmp, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __wcscmp_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, wcscmp, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __wcscmp_evex) + IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2)) + + /* Support sysdeps/x86_64/multiarch/wcsncmp.c. */ +@@ -391,6 +624,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, wcsncmp, + CPU_FEATURE_USABLE (AVX2), + __wcsncmp_avx2) ++ IFUNC_IMPL_ADD (array, i, wcsncmp, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __wcsncmp_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, wcsncmp, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __wcsncmp_evex) + IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2)) + + /* Support sysdeps/x86_64/multiarch/wcscpy.c. */ +@@ -402,15 +644,40 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/x86_64/multiarch/wcslen.c. */ + IFUNC_IMPL (i, name, wcslen, + IFUNC_IMPL_ADD (array, i, wcslen, +- CPU_FEATURE_USABLE (AVX2), ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (BMI2)), + __wcslen_avx2) ++ IFUNC_IMPL_ADD (array, i, wcslen, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (BMI2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __wcslen_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, wcslen, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __wcslen_evex) ++ IFUNC_IMPL_ADD (array, i, wcslen, ++ CPU_FEATURE_USABLE (SSE4_1), ++ __wcslen_sse4_1) + IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2)) + + /* Support sysdeps/x86_64/multiarch/wcsnlen.c. */ + IFUNC_IMPL (i, name, wcsnlen, + IFUNC_IMPL_ADD (array, i, wcsnlen, +- CPU_FEATURE_USABLE (AVX2), ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (BMI2)), + __wcsnlen_avx2) ++ IFUNC_IMPL_ADD (array, i, wcsnlen, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (BMI2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __wcsnlen_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, wcsnlen, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __wcsnlen_evex) + IFUNC_IMPL_ADD (array, i, wcsnlen, + CPU_FEATURE_USABLE (SSE4_1), + __wcsnlen_sse4_1) +@@ -421,6 +688,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, wmemchr, + CPU_FEATURE_USABLE (AVX2), + __wmemchr_avx2) ++ IFUNC_IMPL_ADD (array, i, wmemchr, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __wmemchr_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, wmemchr, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __wmemchr_evex) + IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2)) + + /* Support sysdeps/x86_64/multiarch/wmemcmp.c. */ +@@ -429,6 +705,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (MOVBE)), + __wmemcmp_avx2_movbe) ++ IFUNC_IMPL_ADD (array, i, wmemcmp, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (MOVBE) ++ && CPU_FEATURE_USABLE (RTM)), ++ __wmemcmp_avx2_movbe_rtm) ++ IFUNC_IMPL_ADD (array, i, wmemcmp, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (MOVBE)), ++ __wmemcmp_evex_movbe) + IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSE4_1), + __wmemcmp_sse4_1) + IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSSE3), +@@ -443,7 +729,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + CPU_FEATURE_USABLE (AVX2), + __wmemset_avx2_unaligned) + IFUNC_IMPL_ADD (array, i, wmemset, +- CPU_FEATURE_USABLE (AVX512F), ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __wmemset_avx2_unaligned_rtm) ++ IFUNC_IMPL_ADD (array, i, wmemset, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __wmemset_evex_unaligned) ++ IFUNC_IMPL_ADD (array, i, wmemset, ++ CPU_FEATURE_USABLE (AVX512VL), + __wmemset_avx512_unaligned)) + + #ifdef SHARED +@@ -453,10 +746,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + CPU_FEATURE_USABLE (AVX512F), + __memcpy_chk_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __memcpy_chk_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __memcpy_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + CPU_FEATURE_USABLE (AVX), +@@ -464,6 +757,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + CPU_FEATURE_USABLE (AVX), + __memcpy_chk_avx_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, __memcpy_chk, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memcpy_chk_avx_unaligned_rtm) ++ IFUNC_IMPL_ADD (array, i, __memcpy_chk, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memcpy_chk_avx_unaligned_erms_rtm) ++ IFUNC_IMPL_ADD (array, i, __memcpy_chk, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __memcpy_chk_evex_unaligned) ++ IFUNC_IMPL_ADD (array, i, __memcpy_chk, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __memcpy_chk_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + CPU_FEATURE_USABLE (SSSE3), + __memcpy_chk_ssse3_back) +@@ -486,6 +793,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memcpy, + CPU_FEATURE_USABLE (AVX), + __memcpy_avx_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, memcpy, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memcpy_avx_unaligned_rtm) ++ IFUNC_IMPL_ADD (array, i, memcpy, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __memcpy_avx_unaligned_erms_rtm) ++ IFUNC_IMPL_ADD (array, i, memcpy, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __memcpy_evex_unaligned) ++ IFUNC_IMPL_ADD (array, i, memcpy, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __memcpy_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3), + __memcpy_ssse3_back) + IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3), +@@ -494,10 +815,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + CPU_FEATURE_USABLE (AVX512F), + __memcpy_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, memcpy, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __memcpy_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, memcpy, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __memcpy_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, memcpy, 1, +@@ -511,10 +832,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + CPU_FEATURE_USABLE (AVX512F), + __mempcpy_chk_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_chk_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + CPU_FEATURE_USABLE (AVX), +@@ -522,6 +843,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + CPU_FEATURE_USABLE (AVX), + __mempcpy_chk_avx_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, __mempcpy_chk, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __mempcpy_chk_avx_unaligned_rtm) ++ IFUNC_IMPL_ADD (array, i, __mempcpy_chk, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __mempcpy_chk_avx_unaligned_erms_rtm) ++ IFUNC_IMPL_ADD (array, i, __mempcpy_chk, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __mempcpy_chk_evex_unaligned) ++ IFUNC_IMPL_ADD (array, i, __mempcpy_chk, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __mempcpy_chk_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + CPU_FEATURE_USABLE (SSSE3), + __mempcpy_chk_ssse3_back) +@@ -542,10 +877,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + CPU_FEATURE_USABLE (AVX512F), + __mempcpy_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, mempcpy, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, mempcpy, +- CPU_FEATURE_USABLE (AVX512F), ++ CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, mempcpy, + CPU_FEATURE_USABLE (AVX), +@@ -553,6 +888,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, mempcpy, + CPU_FEATURE_USABLE (AVX), + __mempcpy_avx_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, mempcpy, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __mempcpy_avx_unaligned_rtm) ++ IFUNC_IMPL_ADD (array, i, mempcpy, ++ (CPU_FEATURE_USABLE (AVX) ++ && CPU_FEATURE_USABLE (RTM)), ++ __mempcpy_avx_unaligned_erms_rtm) ++ IFUNC_IMPL_ADD (array, i, mempcpy, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __mempcpy_evex_unaligned) ++ IFUNC_IMPL_ADD (array, i, mempcpy, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __mempcpy_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3), + __mempcpy_ssse3_back) + IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3), +@@ -568,6 +917,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strncmp, + CPU_FEATURE_USABLE (AVX2), + __strncmp_avx2) ++ IFUNC_IMPL_ADD (array, i, strncmp, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strncmp_avx2_rtm) ++ IFUNC_IMPL_ADD (array, i, strncmp, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __strncmp_evex) + IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2), + __strncmp_sse42) + IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSSE3), +@@ -582,6 +939,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, __wmemset_chk, + CPU_FEATURE_USABLE (AVX2), + __wmemset_chk_avx2_unaligned) ++ IFUNC_IMPL_ADD (array, i, __wmemset_chk, ++ CPU_FEATURE_USABLE (AVX512VL), ++ __wmemset_chk_evex_unaligned) + IFUNC_IMPL_ADD (array, i, __wmemset_chk, + CPU_FEATURE_USABLE (AVX512F), + __wmemset_chk_avx512_unaligned)) +diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h +index 0e21b3a628..4f96c2764a 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h ++++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h +@@ -23,17 +23,28 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe_rtm) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + +- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) +- && CPU_FEATURE_USABLE_P (cpu_features, AVX2) ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && CPU_FEATURE_USABLE_P (cpu_features, MOVBE) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) +- return OPTIMIZE (avx2_movbe); ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) ++ return OPTIMIZE (evex_movbe); ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ return OPTIMIZE (avx2_movbe_rtm); ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) ++ return OPTIMIZE (avx2_movbe); ++ } + + if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) + return OPTIMIZE (sse4_1); +diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h +index 9ada03aa43..db26210e3b 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-memmove.h ++++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h +@@ -29,6 +29,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms) + attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_rtm) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms_rtm) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms) ++ attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) + attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms) +@@ -48,21 +56,42 @@ IFUNC_SELECTOR (void) + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + { +- if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) +- return OPTIMIZE (avx512_no_vzeroupper); ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE (avx512_unaligned_erms); + +- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) +- return OPTIMIZE (avx512_unaligned_erms); ++ return OPTIMIZE (avx512_unaligned); ++ } + +- return OPTIMIZE (avx512_unaligned); ++ return OPTIMIZE (avx512_no_vzeroupper); + } + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + { +- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) +- return OPTIMIZE (avx_unaligned_erms); ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE (evex_unaligned_erms); ++ ++ return OPTIMIZE (evex_unaligned); ++ } ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE (avx_unaligned_erms_rtm); ++ ++ return OPTIMIZE (avx_unaligned_rtm); ++ } ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE (avx_unaligned_erms); + +- return OPTIMIZE (avx_unaligned); ++ return OPTIMIZE (avx_unaligned); ++ } + } + + if (!CPU_FEATURE_USABLE_P (cpu_features, SSSE3) +diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h +index f52613d372..57029fc17b 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-memset.h ++++ b/sysdeps/x86_64/multiarch/ifunc-memset.h +@@ -27,6 +27,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms) + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms) + attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms) ++ attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) + attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms) +@@ -45,21 +53,44 @@ IFUNC_SELECTOR (void) + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + { +- if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) +- return OPTIMIZE (avx512_no_vzeroupper); ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE (avx512_unaligned_erms); + +- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) +- return OPTIMIZE (avx512_unaligned_erms); ++ return OPTIMIZE (avx512_unaligned); ++ } + +- return OPTIMIZE (avx512_unaligned); ++ return OPTIMIZE (avx512_no_vzeroupper); + } + + if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)) + { +- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) +- return OPTIMIZE (avx2_unaligned_erms); +- else +- return OPTIMIZE (avx2_unaligned); ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE (evex_unaligned_erms); ++ ++ return OPTIMIZE (evex_unaligned); ++ } ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE (avx2_unaligned_erms_rtm); ++ ++ return OPTIMIZE (avx2_unaligned_rtm); ++ } ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE (avx2_unaligned_erms); ++ ++ return OPTIMIZE (avx2_unaligned); ++ } + } + + if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) +diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h +index 63b0dc0d96..35741f3ec8 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h ++++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h +@@ -25,16 +25,27 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) + attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + +- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) +- && CPU_FEATURE_USABLE_P (cpu_features, AVX2) ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) +- return OPTIMIZE (avx2); ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) ++ return OPTIMIZE (evex); ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ return OPTIMIZE (avx2_rtm); ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) ++ return OPTIMIZE (avx2); ++ } + + if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) + return OPTIMIZE (sse2_unaligned); +diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h +new file mode 100644 +index 0000000000..39e3347378 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h +@@ -0,0 +1,52 @@ ++/* Common definition for ifunc selections for wcslen and wcsnlen ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2017-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <init-arch.h> ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++ const struct cpu_features* cpu_features = __get_cpu_features (); ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2) ++ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) ++ return OPTIMIZE (evex); ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ return OPTIMIZE (avx2_rtm); ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) ++ return OPTIMIZE (avx2); ++ } ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) ++ return OPTIMIZE (sse4_1); ++ ++ return OPTIMIZE (sse2); ++} +diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h +index 8cfce562fc..e06e8b4d80 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h ++++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h +@@ -20,6 +20,9 @@ + + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden; + + static inline void * +@@ -27,14 +30,21 @@ IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + +- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) +- && CPU_FEATURE_USABLE_P (cpu_features, AVX2) ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + { +- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) +- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) +- return OPTIMIZE (avx512_unaligned); +- else ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)) ++ { ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) ++ return OPTIMIZE (avx512_unaligned); ++ ++ return OPTIMIZE (evex_unaligned); ++ } ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ return OPTIMIZE (avx2_unaligned_rtm); ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2_unaligned); + } + +diff --git a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S +new file mode 100644 +index 0000000000..87b076c7c4 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S +@@ -0,0 +1,12 @@ ++#ifndef MEMCHR ++# define MEMCHR __memchr_avx2_rtm ++#endif ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define SECTION(p) p##.avx.rtm ++ ++#include "memchr-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S +index e5a9abd211..0987616a1b 100644 +--- a/sysdeps/x86_64/multiarch/memchr-avx2.S ++++ b/sysdeps/x86_64/multiarch/memchr-avx2.S +@@ -26,319 +26,407 @@ + + # ifdef USE_AS_WMEMCHR + # define VPCMPEQ vpcmpeqd ++# define VPBROADCAST vpbroadcastd ++# define CHAR_SIZE 4 + # else + # define VPCMPEQ vpcmpeqb ++# define VPBROADCAST vpbroadcastb ++# define CHAR_SIZE 1 ++# endif ++ ++# ifdef USE_AS_RAWMEMCHR ++# define ERAW_PTR_REG ecx ++# define RRAW_PTR_REG rcx ++# define ALGN_PTR_REG rdi ++# else ++# define ERAW_PTR_REG edi ++# define RRAW_PTR_REG rdi ++# define ALGN_PTR_REG rcx + # endif + + # ifndef VZEROUPPER + # define VZEROUPPER vzeroupper + # endif + ++# ifndef SECTION ++# define SECTION(p) p##.avx ++# endif ++ + # define VEC_SIZE 32 ++# define PAGE_SIZE 4096 ++# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + +- .section .text.avx,"ax",@progbits ++ .section SECTION(.text),"ax",@progbits + ENTRY (MEMCHR) + # ifndef USE_AS_RAWMEMCHR + /* Check for zero length. */ ++# ifdef __ILP32__ ++ /* Clear upper bits. */ ++ and %RDX_LP, %RDX_LP ++# else + test %RDX_LP, %RDX_LP ++# endif + jz L(null) + # endif +- movl %edi, %ecx +- /* Broadcast CHAR to YMM0. */ ++ /* Broadcast CHAR to YMMMATCH. */ + vmovd %esi, %xmm0 +-# ifdef USE_AS_WMEMCHR +- shl $2, %RDX_LP +- vpbroadcastd %xmm0, %ymm0 +-# else +-# ifdef __ILP32__ +- /* Clear the upper 32 bits. */ +- movl %edx, %edx +-# endif +- vpbroadcastb %xmm0, %ymm0 +-# endif ++ VPBROADCAST %xmm0, %ymm0 + /* Check if we may cross page boundary with one vector load. */ +- andl $(2 * VEC_SIZE - 1), %ecx +- cmpl $VEC_SIZE, %ecx +- ja L(cros_page_boundary) ++ movl %edi, %eax ++ andl $(PAGE_SIZE - 1), %eax ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ++ ja L(cross_page_boundary) + + /* Check the first VEC_SIZE bytes. */ +- VPCMPEQ (%rdi), %ymm0, %ymm1 ++ VPCMPEQ (%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax +- testl %eax, %eax +- + # ifndef USE_AS_RAWMEMCHR +- jnz L(first_vec_x0_check) +- /* Adjust length and check the end of data. */ +- subq $VEC_SIZE, %rdx +- jbe L(zero) +-# else +- jnz L(first_vec_x0) ++ /* If length < CHAR_PER_VEC handle special. */ ++ cmpq $CHAR_PER_VEC, %rdx ++ jbe L(first_vec_x0) + # endif +- +- /* Align data for aligned loads in the loop. */ +- addq $VEC_SIZE, %rdi +- andl $(VEC_SIZE - 1), %ecx +- andq $-VEC_SIZE, %rdi ++ testl %eax, %eax ++ jz L(aligned_more) ++ tzcntl %eax, %eax ++ addq %rdi, %rax ++ VZEROUPPER_RETURN + + # ifndef USE_AS_RAWMEMCHR +- /* Adjust length. */ +- addq %rcx, %rdx ++ .p2align 5 ++L(first_vec_x0): ++ /* Check if first match was before length. */ ++ tzcntl %eax, %eax ++# ifdef USE_AS_WMEMCHR ++ /* NB: Multiply length by 4 to get byte count. */ ++ sall $2, %edx ++# endif ++ xorl %ecx, %ecx ++ cmpl %eax, %edx ++ leaq (%rdi, %rax), %rax ++ cmovle %rcx, %rax ++ VZEROUPPER_RETURN + +- subq $(VEC_SIZE * 4), %rdx +- jbe L(last_4x_vec_or_less) ++L(null): ++ xorl %eax, %eax ++ ret + # endif +- jmp L(more_4x_vec) +- + .p2align 4 +-L(cros_page_boundary): +- andl $(VEC_SIZE - 1), %ecx +- andq $-VEC_SIZE, %rdi +- VPCMPEQ (%rdi), %ymm0, %ymm1 ++L(cross_page_boundary): ++ /* Save pointer before aligning as its original value is ++ necessary for computer return address if byte is found or ++ adjusting length if it is not and this is memchr. */ ++ movq %rdi, %rcx ++ /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr ++ and rdi for rawmemchr. */ ++ orq $(VEC_SIZE - 1), %ALGN_PTR_REG ++ VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax ++# ifndef USE_AS_RAWMEMCHR ++ /* Calculate length until end of page (length checked for a ++ match). */ ++ leaq 1(%ALGN_PTR_REG), %rsi ++ subq %RRAW_PTR_REG, %rsi ++# ifdef USE_AS_WMEMCHR ++ /* NB: Divide bytes by 4 to get wchar_t count. */ ++ shrl $2, %esi ++# endif ++# endif + /* Remove the leading bytes. */ +- sarl %cl, %eax +- testl %eax, %eax +- jz L(aligned_more) +- tzcntl %eax, %eax ++ sarxl %ERAW_PTR_REG, %eax, %eax + # ifndef USE_AS_RAWMEMCHR + /* Check the end of data. */ +- cmpq %rax, %rdx +- jbe L(zero) ++ cmpq %rsi, %rdx ++ jbe L(first_vec_x0) + # endif ++ testl %eax, %eax ++ jz L(cross_page_continue) ++ tzcntl %eax, %eax ++ addq %RRAW_PTR_REG, %rax ++L(return_vzeroupper): ++ ZERO_UPPER_VEC_REGISTERS_RETURN ++ ++ .p2align 4 ++L(first_vec_x1): ++ tzcntl %eax, %eax ++ incq %rdi + addq %rdi, %rax +- addq %rcx, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 +-L(aligned_more): +-# ifndef USE_AS_RAWMEMCHR +- /* Calculate "rdx + rcx - VEC_SIZE" with "rdx - (VEC_SIZE - rcx)" +- instead of "(rdx + rcx) - VEC_SIZE" to void possible addition +- overflow. */ +- negq %rcx +- addq $VEC_SIZE, %rcx ++L(first_vec_x2): ++ tzcntl %eax, %eax ++ addq $(VEC_SIZE + 1), %rdi ++ addq %rdi, %rax ++ VZEROUPPER_RETURN + +- /* Check the end of data. */ +- subq %rcx, %rdx +- jbe L(zero) +-# endif ++ .p2align 4 ++L(first_vec_x3): ++ tzcntl %eax, %eax ++ addq $(VEC_SIZE * 2 + 1), %rdi ++ addq %rdi, %rax ++ VZEROUPPER_RETURN + +- addq $VEC_SIZE, %rdi + +-# ifndef USE_AS_RAWMEMCHR +- subq $(VEC_SIZE * 4), %rdx +- jbe L(last_4x_vec_or_less) +-# endif ++ .p2align 4 ++L(first_vec_x4): ++ tzcntl %eax, %eax ++ addq $(VEC_SIZE * 3 + 1), %rdi ++ addq %rdi, %rax ++ VZEROUPPER_RETURN + +-L(more_4x_vec): ++ .p2align 4 ++L(aligned_more): + /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ +- VPCMPEQ (%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- testl %eax, %eax +- jnz L(first_vec_x0) + +- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 ++# ifndef USE_AS_RAWMEMCHR ++L(cross_page_continue): ++ /* Align data to VEC_SIZE - 1. */ ++ xorl %ecx, %ecx ++ subl %edi, %ecx ++ orq $(VEC_SIZE - 1), %rdi ++ /* esi is for adjusting length to see if near the end. */ ++ leal (VEC_SIZE * 4 + 1)(%rdi, %rcx), %esi ++# ifdef USE_AS_WMEMCHR ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %esi ++# endif ++# else ++ orq $(VEC_SIZE - 1), %rdi ++L(cross_page_continue): ++# endif ++ /* Load first VEC regardless. */ ++ VPCMPEQ 1(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax ++# ifndef USE_AS_RAWMEMCHR ++ /* Adjust length. If near end handle specially. */ ++ subq %rsi, %rdx ++ jbe L(last_4x_vec_or_less) ++# endif + testl %eax, %eax + jnz L(first_vec_x1) + +- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 ++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(first_vec_x2) + +- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 ++ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(first_vec_x3) + +- addq $(VEC_SIZE * 4), %rdi +- +-# ifndef USE_AS_RAWMEMCHR +- subq $(VEC_SIZE * 4), %rdx +- jbe L(last_4x_vec_or_less) +-# endif +- +- /* Align data to 4 * VEC_SIZE. */ +- movq %rdi, %rcx +- andl $(4 * VEC_SIZE - 1), %ecx +- andq $-(4 * VEC_SIZE), %rdi ++ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x4) + + # ifndef USE_AS_RAWMEMCHR +- /* Adjust length. */ ++ /* Check if at last VEC_SIZE * 4 length. */ ++ subq $(CHAR_PER_VEC * 4), %rdx ++ jbe L(last_4x_vec_or_less_cmpeq) ++ /* Align data to VEC_SIZE * 4 - 1 for the loop and readjust ++ length. */ ++ incq %rdi ++ movl %edi, %ecx ++ orq $(VEC_SIZE * 4 - 1), %rdi ++ andl $(VEC_SIZE * 4 - 1), %ecx ++# ifdef USE_AS_WMEMCHR ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %ecx ++# endif + addq %rcx, %rdx ++# else ++ /* Align data to VEC_SIZE * 4 - 1 for loop. */ ++ incq %rdi ++ orq $(VEC_SIZE * 4 - 1), %rdi + # endif + ++ /* Compare 4 * VEC at a time forward. */ + .p2align 4 + L(loop_4x_vec): +- /* Compare 4 * VEC at a time forward. */ +- VPCMPEQ (%rdi), %ymm0, %ymm1 +- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm2 +- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm3 +- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm4 +- ++ VPCMPEQ 1(%rdi), %ymm0, %ymm1 ++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm2 ++ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm3 ++ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm4 + vpor %ymm1, %ymm2, %ymm5 + vpor %ymm3, %ymm4, %ymm6 + vpor %ymm5, %ymm6, %ymm5 + +- vpmovmskb %ymm5, %eax +- testl %eax, %eax +- jnz L(4x_vec_end) +- +- addq $(VEC_SIZE * 4), %rdi +- ++ vpmovmskb %ymm5, %ecx + # ifdef USE_AS_RAWMEMCHR +- jmp L(loop_4x_vec) ++ subq $-(VEC_SIZE * 4), %rdi ++ testl %ecx, %ecx ++ jz L(loop_4x_vec) + # else +- subq $(VEC_SIZE * 4), %rdx +- ja L(loop_4x_vec) ++ testl %ecx, %ecx ++ jnz L(loop_4x_vec_end) + +-L(last_4x_vec_or_less): +- /* Less than 4 * VEC and aligned to VEC_SIZE. */ +- addl $(VEC_SIZE * 2), %edx +- jle L(last_2x_vec) ++ subq $-(VEC_SIZE * 4), %rdi + +- VPCMPEQ (%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- testl %eax, %eax +- jnz L(first_vec_x0) ++ subq $(CHAR_PER_VEC * 4), %rdx ++ ja L(loop_4x_vec) + +- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 ++ /* Fall through into less than 4 remaining vectors of length ++ case. */ ++ VPCMPEQ (VEC_SIZE * 0 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax ++ .p2align 4 ++L(last_4x_vec_or_less): ++# ifdef USE_AS_WMEMCHR ++ /* NB: Multiply length by 4 to get byte count. */ ++ sall $2, %edx ++# endif ++ /* Check if first VEC contained match. */ + testl %eax, %eax +- jnz L(first_vec_x1) ++ jnz L(first_vec_x1_check) + +- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- testl %eax, %eax ++ /* If remaining length > VEC_SIZE * 2. */ ++ addl $(VEC_SIZE * 2), %edx ++ jg L(last_4x_vec) + +- jnz L(first_vec_x2_check) +- subl $VEC_SIZE, %edx +- jle L(zero) ++L(last_2x_vec): ++ /* If remaining length < VEC_SIZE. */ ++ addl $VEC_SIZE, %edx ++ jle L(zero_end) + +- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 ++ /* Check VEC2 and compare any match with remaining length. */ ++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax +- testl %eax, %eax +- +- jnz L(first_vec_x3_check) +- xorl %eax, %eax +- VZEROUPPER +- ret ++ tzcntl %eax, %eax ++ cmpl %eax, %edx ++ jbe L(set_zero_end) ++ addq $(VEC_SIZE + 1), %rdi ++ addq %rdi, %rax ++L(zero_end): ++ VZEROUPPER_RETURN + + .p2align 4 +-L(last_2x_vec): +- addl $(VEC_SIZE * 2), %edx +- VPCMPEQ (%rdi), %ymm0, %ymm1 ++L(loop_4x_vec_end): ++# endif ++ /* rawmemchr will fall through into this if match was found in ++ loop. */ ++ + vpmovmskb %ymm1, %eax + testl %eax, %eax ++ jnz L(last_vec_x1_return) + +- jnz L(first_vec_x0_check) +- subl $VEC_SIZE, %edx +- jle L(zero) +- +- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax ++ vpmovmskb %ymm2, %eax + testl %eax, %eax +- jnz L(first_vec_x1_check) +- xorl %eax, %eax +- VZEROUPPER +- ret ++ jnz L(last_vec_x2_return) + +- .p2align 4 +-L(first_vec_x0_check): +- tzcntl %eax, %eax +- /* Check the end of data. */ +- cmpq %rax, %rdx +- jbe L(zero) ++ vpmovmskb %ymm3, %eax ++ /* Combine VEC3 matches (eax) with VEC4 matches (ecx). */ ++ salq $32, %rcx ++ orq %rcx, %rax ++ tzcntq %rax, %rax ++# ifdef USE_AS_RAWMEMCHR ++ subq $(VEC_SIZE * 2 - 1), %rdi ++# else ++ subq $-(VEC_SIZE * 2 + 1), %rdi ++# endif + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN ++# ifndef USE_AS_RAWMEMCHR + + .p2align 4 + L(first_vec_x1_check): + tzcntl %eax, %eax +- /* Check the end of data. */ +- cmpq %rax, %rdx +- jbe L(zero) +- addq $VEC_SIZE, %rax ++ /* Adjust length. */ ++ subl $-(VEC_SIZE * 4), %edx ++ /* Check if match within remaining length. */ ++ cmpl %eax, %edx ++ jbe L(set_zero_end) ++ incq %rdi + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN ++ .p2align 4 ++L(set_zero_end): ++ xorl %eax, %eax ++ VZEROUPPER_RETURN ++# endif + + .p2align 4 +-L(first_vec_x2_check): ++L(last_vec_x1_return): + tzcntl %eax, %eax +- /* Check the end of data. */ +- cmpq %rax, %rdx +- jbe L(zero) +- addq $(VEC_SIZE * 2), %rax ++# ifdef USE_AS_RAWMEMCHR ++ subq $(VEC_SIZE * 4 - 1), %rdi ++# else ++ incq %rdi ++# endif + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 +-L(first_vec_x3_check): ++L(last_vec_x2_return): + tzcntl %eax, %eax +- /* Check the end of data. */ +- cmpq %rax, %rdx +- jbe L(zero) +- addq $(VEC_SIZE * 3), %rax ++# ifdef USE_AS_RAWMEMCHR ++ subq $(VEC_SIZE * 3 - 1), %rdi ++# else ++ subq $-(VEC_SIZE + 1), %rdi ++# endif + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + ++# ifndef USE_AS_RAWMEMCHR + .p2align 4 +-L(zero): +- VZEROUPPER +-L(null): +- xorl %eax, %eax +- ret +-# endif ++L(last_4x_vec_or_less_cmpeq): ++ VPCMPEQ (VEC_SIZE * 4 + 1)(%rdi), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %eax ++# ifdef USE_AS_WMEMCHR ++ /* NB: Multiply length by 4 to get byte count. */ ++ sall $2, %edx ++# endif ++ subq $-(VEC_SIZE * 4), %rdi ++ /* Check first VEC regardless. */ ++ testl %eax, %eax ++ jnz L(first_vec_x1_check) + ++ /* If remaining length <= CHAR_PER_VEC * 2. */ ++ addl $(VEC_SIZE * 2), %edx ++ jle L(last_2x_vec) + .p2align 4 +-L(first_vec_x0): +- tzcntl %eax, %eax +- addq %rdi, %rax +- VZEROUPPER +- ret ++L(last_4x_vec): ++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x2_return) + +- .p2align 4 +-L(first_vec_x1): +- tzcntl %eax, %eax +- addq $VEC_SIZE, %rax +- addq %rdi, %rax +- VZEROUPPER +- ret ++ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %eax + +- .p2align 4 +-L(first_vec_x2): ++ /* Create mask for possible matches within remaining length. */ ++ movq $-1, %rcx ++ bzhiq %rdx, %rcx, %rcx ++ ++ /* Test matches in data against length match. */ ++ andl %ecx, %eax ++ jnz L(last_vec_x3) ++ ++ /* if remaining length <= VEC_SIZE * 3 (Note this is after ++ remaining length was found to be > VEC_SIZE * 2. */ ++ subl $VEC_SIZE, %edx ++ jbe L(zero_end2) ++ ++ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %eax ++ /* Shift remaining length mask for last VEC. */ ++ shrq $32, %rcx ++ andl %ecx, %eax ++ jz L(zero_end2) + tzcntl %eax, %eax +- addq $(VEC_SIZE * 2), %rax ++ addq $(VEC_SIZE * 3 + 1), %rdi + addq %rdi, %rax +- VZEROUPPER +- ret ++L(zero_end2): ++ VZEROUPPER_RETURN + + .p2align 4 +-L(4x_vec_end): +- vpmovmskb %ymm1, %eax +- testl %eax, %eax +- jnz L(first_vec_x0) +- vpmovmskb %ymm2, %eax +- testl %eax, %eax +- jnz L(first_vec_x1) +- vpmovmskb %ymm3, %eax +- testl %eax, %eax +- jnz L(first_vec_x2) +- vpmovmskb %ymm4, %eax +- testl %eax, %eax +-L(first_vec_x3): ++L(last_vec_x3): + tzcntl %eax, %eax +- addq $(VEC_SIZE * 3), %rax ++ subq $-(VEC_SIZE * 2 + 1), %rdi + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN ++# endif + + END (MEMCHR) + #endif +diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S +new file mode 100644 +index 0000000000..f3fdad4fda +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memchr-evex.S +@@ -0,0 +1,478 @@ ++/* memchr/wmemchr optimized with 256-bit EVEX instructions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++ ++# include <sysdep.h> ++ ++# ifndef MEMCHR ++# define MEMCHR __memchr_evex ++# endif ++ ++# ifdef USE_AS_WMEMCHR ++# define VPBROADCAST vpbroadcastd ++# define VPMINU vpminud ++# define VPCMP vpcmpd ++# define VPCMPEQ vpcmpeqd ++# define CHAR_SIZE 4 ++# else ++# define VPBROADCAST vpbroadcastb ++# define VPMINU vpminub ++# define VPCMP vpcmpb ++# define VPCMPEQ vpcmpeqb ++# define CHAR_SIZE 1 ++# endif ++ ++# ifdef USE_AS_RAWMEMCHR ++# define RAW_PTR_REG rcx ++# define ALGN_PTR_REG rdi ++# else ++# define RAW_PTR_REG rdi ++# define ALGN_PTR_REG rcx ++# endif ++ ++# define XMMZERO xmm23 ++# define YMMZERO ymm23 ++# define XMMMATCH xmm16 ++# define YMMMATCH ymm16 ++# define YMM1 ymm17 ++# define YMM2 ymm18 ++# define YMM3 ymm19 ++# define YMM4 ymm20 ++# define YMM5 ymm21 ++# define YMM6 ymm22 ++ ++# define VEC_SIZE 32 ++# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) ++# define PAGE_SIZE 4096 ++ ++ .section .text.evex,"ax",@progbits ++ENTRY (MEMCHR) ++# ifndef USE_AS_RAWMEMCHR ++ /* Check for zero length. */ ++ test %RDX_LP, %RDX_LP ++ jz L(zero) ++ ++# ifdef __ILP32__ ++ /* Clear the upper 32 bits. */ ++ movl %edx, %edx ++# endif ++# endif ++ /* Broadcast CHAR to YMMMATCH. */ ++ VPBROADCAST %esi, %YMMMATCH ++ /* Check if we may cross page boundary with one vector load. */ ++ movl %edi, %eax ++ andl $(PAGE_SIZE - 1), %eax ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ++ ja L(cross_page_boundary) ++ ++ /* Check the first VEC_SIZE bytes. */ ++ VPCMP $0, (%rdi), %YMMMATCH, %k0 ++ kmovd %k0, %eax ++# ifndef USE_AS_RAWMEMCHR ++ /* If length < CHAR_PER_VEC handle special. */ ++ cmpq $CHAR_PER_VEC, %rdx ++ jbe L(first_vec_x0) ++# endif ++ testl %eax, %eax ++ jz L(aligned_more) ++ tzcntl %eax, %eax ++# ifdef USE_AS_WMEMCHR ++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ ++ leaq (%rdi, %rax, CHAR_SIZE), %rax ++# else ++ addq %rdi, %rax ++# endif ++ ret ++ ++# ifndef USE_AS_RAWMEMCHR ++L(zero): ++ xorl %eax, %eax ++ ret ++ ++ .p2align 5 ++L(first_vec_x0): ++ /* Check if first match was before length. */ ++ tzcntl %eax, %eax ++ xorl %ecx, %ecx ++ cmpl %eax, %edx ++ leaq (%rdi, %rax, CHAR_SIZE), %rax ++ cmovle %rcx, %rax ++ ret ++# else ++ /* NB: first_vec_x0 is 17 bytes which will leave ++ cross_page_boundary (which is relatively cold) close enough ++ to ideal alignment. So only realign L(cross_page_boundary) if ++ rawmemchr. */ ++ .p2align 4 ++# endif ++L(cross_page_boundary): ++ /* Save pointer before aligning as its original value is ++ necessary for computer return address if byte is found or ++ adjusting length if it is not and this is memchr. */ ++ movq %rdi, %rcx ++ /* Align data to VEC_SIZE. ALGN_PTR_REG is rcx for memchr and rdi ++ for rawmemchr. */ ++ andq $-VEC_SIZE, %ALGN_PTR_REG ++ VPCMP $0, (%ALGN_PTR_REG), %YMMMATCH, %k0 ++ kmovd %k0, %r8d ++# ifdef USE_AS_WMEMCHR ++ /* NB: Divide shift count by 4 since each bit in K0 represent 4 ++ bytes. */ ++ sarl $2, %eax ++# endif ++# ifndef USE_AS_RAWMEMCHR ++ movl $(PAGE_SIZE / CHAR_SIZE), %esi ++ subl %eax, %esi ++# endif ++# ifdef USE_AS_WMEMCHR ++ andl $(CHAR_PER_VEC - 1), %eax ++# endif ++ /* Remove the leading bytes. */ ++ sarxl %eax, %r8d, %eax ++# ifndef USE_AS_RAWMEMCHR ++ /* Check the end of data. */ ++ cmpq %rsi, %rdx ++ jbe L(first_vec_x0) ++# endif ++ testl %eax, %eax ++ jz L(cross_page_continue) ++ tzcntl %eax, %eax ++# ifdef USE_AS_WMEMCHR ++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ ++ leaq (%RAW_PTR_REG, %rax, CHAR_SIZE), %rax ++# else ++ addq %RAW_PTR_REG, %rax ++# endif ++ ret ++ ++ .p2align 4 ++L(first_vec_x1): ++ tzcntl %eax, %eax ++ leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++ .p2align 4 ++L(first_vec_x2): ++ tzcntl %eax, %eax ++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++ .p2align 4 ++L(first_vec_x3): ++ tzcntl %eax, %eax ++ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++ .p2align 4 ++L(first_vec_x4): ++ tzcntl %eax, %eax ++ leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++ .p2align 5 ++L(aligned_more): ++ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time ++ since data is only aligned to VEC_SIZE. */ ++ ++# ifndef USE_AS_RAWMEMCHR ++ /* Align data to VEC_SIZE. */ ++L(cross_page_continue): ++ xorl %ecx, %ecx ++ subl %edi, %ecx ++ andq $-VEC_SIZE, %rdi ++ /* esi is for adjusting length to see if near the end. */ ++ leal (VEC_SIZE * 5)(%rdi, %rcx), %esi ++# ifdef USE_AS_WMEMCHR ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %esi ++# endif ++# else ++ andq $-VEC_SIZE, %rdi ++L(cross_page_continue): ++# endif ++ /* Load first VEC regardless. */ ++ VPCMP $0, (VEC_SIZE)(%rdi), %YMMMATCH, %k0 ++ kmovd %k0, %eax ++# ifndef USE_AS_RAWMEMCHR ++ /* Adjust length. If near end handle specially. */ ++ subq %rsi, %rdx ++ jbe L(last_4x_vec_or_less) ++# endif ++ testl %eax, %eax ++ jnz L(first_vec_x1) ++ ++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x2) ++ ++ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x3) ++ ++ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x4) ++ ++ ++# ifndef USE_AS_RAWMEMCHR ++ /* Check if at last CHAR_PER_VEC * 4 length. */ ++ subq $(CHAR_PER_VEC * 4), %rdx ++ jbe L(last_4x_vec_or_less_cmpeq) ++ addq $VEC_SIZE, %rdi ++ ++ /* Align data to VEC_SIZE * 4 for the loop and readjust length. ++ */ ++# ifdef USE_AS_WMEMCHR ++ movl %edi, %ecx ++ andq $-(4 * VEC_SIZE), %rdi ++ andl $(VEC_SIZE * 4 - 1), %ecx ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %ecx ++ addq %rcx, %rdx ++# else ++ addq %rdi, %rdx ++ andq $-(4 * VEC_SIZE), %rdi ++ subq %rdi, %rdx ++# endif ++# else ++ addq $VEC_SIZE, %rdi ++ andq $-(4 * VEC_SIZE), %rdi ++# endif ++ ++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO ++ ++ /* Compare 4 * VEC at a time forward. */ ++ .p2align 4 ++L(loop_4x_vec): ++ /* It would be possible to save some instructions using 4x VPCMP ++ but bottleneck on port 5 makes it not woth it. */ ++ VPCMP $4, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k1 ++ /* xor will set bytes match esi to zero. */ ++ vpxorq (VEC_SIZE * 5)(%rdi), %YMMMATCH, %YMM2 ++ vpxorq (VEC_SIZE * 6)(%rdi), %YMMMATCH, %YMM3 ++ VPCMP $0, (VEC_SIZE * 7)(%rdi), %YMMMATCH, %k3 ++ /* Reduce VEC2 / VEC3 with min and VEC1 with zero mask. */ ++ VPMINU %YMM2, %YMM3, %YMM3{%k1}{z} ++ VPCMP $0, %YMM3, %YMMZERO, %k2 ++# ifdef USE_AS_RAWMEMCHR ++ subq $-(VEC_SIZE * 4), %rdi ++ kortestd %k2, %k3 ++ jz L(loop_4x_vec) ++# else ++ kortestd %k2, %k3 ++ jnz L(loop_4x_vec_end) ++ ++ subq $-(VEC_SIZE * 4), %rdi ++ ++ subq $(CHAR_PER_VEC * 4), %rdx ++ ja L(loop_4x_vec) ++ ++ /* Fall through into less than 4 remaining vectors of length case. ++ */ ++ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 ++ kmovd %k0, %eax ++ addq $(VEC_SIZE * 3), %rdi ++ .p2align 4 ++L(last_4x_vec_or_less): ++ /* Check if first VEC contained match. */ ++ testl %eax, %eax ++ jnz L(first_vec_x1_check) ++ ++ /* If remaining length > CHAR_PER_VEC * 2. */ ++ addl $(CHAR_PER_VEC * 2), %edx ++ jg L(last_4x_vec) ++ ++L(last_2x_vec): ++ /* If remaining length < CHAR_PER_VEC. */ ++ addl $CHAR_PER_VEC, %edx ++ jle L(zero_end) ++ ++ /* Check VEC2 and compare any match with remaining length. */ ++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 ++ kmovd %k0, %eax ++ tzcntl %eax, %eax ++ cmpl %eax, %edx ++ jbe L(set_zero_end) ++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ++L(zero_end): ++ ret ++ ++ ++ .p2align 4 ++L(first_vec_x1_check): ++ tzcntl %eax, %eax ++ /* Adjust length. */ ++ subl $-(CHAR_PER_VEC * 4), %edx ++ /* Check if match within remaining length. */ ++ cmpl %eax, %edx ++ jbe L(set_zero_end) ++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ ++ leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++L(set_zero_end): ++ xorl %eax, %eax ++ ret ++ ++ .p2align 4 ++L(loop_4x_vec_end): ++# endif ++ /* rawmemchr will fall through into this if match was found in ++ loop. */ ++ ++ /* k1 has not of matches with VEC1. */ ++ kmovd %k1, %eax ++# ifdef USE_AS_WMEMCHR ++ subl $((1 << CHAR_PER_VEC) - 1), %eax ++# else ++ incl %eax ++# endif ++ jnz L(last_vec_x1_return) ++ ++ VPCMP $0, %YMM2, %YMMZERO, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x2_return) ++ ++ kmovd %k2, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x3_return) ++ ++ kmovd %k3, %eax ++ tzcntl %eax, %eax ++# ifdef USE_AS_RAWMEMCHR ++ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax ++# else ++ leaq (VEC_SIZE * 7)(%rdi, %rax, CHAR_SIZE), %rax ++# endif ++ ret ++ ++ .p2align 4 ++L(last_vec_x1_return): ++ tzcntl %eax, %eax ++# ifdef USE_AS_RAWMEMCHR ++# ifdef USE_AS_WMEMCHR ++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ ++ leaq (%rdi, %rax, CHAR_SIZE), %rax ++# else ++ addq %rdi, %rax ++# endif ++# else ++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ ++ leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax ++# endif ++ ret ++ ++ .p2align 4 ++L(last_vec_x2_return): ++ tzcntl %eax, %eax ++# ifdef USE_AS_RAWMEMCHR ++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ ++ leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax ++# else ++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ ++ leaq (VEC_SIZE * 5)(%rdi, %rax, CHAR_SIZE), %rax ++# endif ++ ret ++ ++ .p2align 4 ++L(last_vec_x3_return): ++ tzcntl %eax, %eax ++# ifdef USE_AS_RAWMEMCHR ++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ ++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ++# else ++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ ++ leaq (VEC_SIZE * 6)(%rdi, %rax, CHAR_SIZE), %rax ++# endif ++ ret ++ ++ ++# ifndef USE_AS_RAWMEMCHR ++L(last_4x_vec_or_less_cmpeq): ++ VPCMP $0, (VEC_SIZE * 5)(%rdi), %YMMMATCH, %k0 ++ kmovd %k0, %eax ++ subq $-(VEC_SIZE * 4), %rdi ++ /* Check first VEC regardless. */ ++ testl %eax, %eax ++ jnz L(first_vec_x1_check) ++ ++ /* If remaining length <= CHAR_PER_VEC * 2. */ ++ addl $(CHAR_PER_VEC * 2), %edx ++ jle L(last_2x_vec) ++ ++ .p2align 4 ++L(last_4x_vec): ++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x2) ++ ++ ++ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0 ++ kmovd %k0, %eax ++ /* Create mask for possible matches within remaining length. */ ++# ifdef USE_AS_WMEMCHR ++ movl $((1 << (CHAR_PER_VEC * 2)) - 1), %ecx ++ bzhil %edx, %ecx, %ecx ++# else ++ movq $-1, %rcx ++ bzhiq %rdx, %rcx, %rcx ++# endif ++ /* Test matches in data against length match. */ ++ andl %ecx, %eax ++ jnz L(last_vec_x3) ++ ++ /* if remaining length <= CHAR_PER_VEC * 3 (Note this is after ++ remaining length was found to be > CHAR_PER_VEC * 2. */ ++ subl $CHAR_PER_VEC, %edx ++ jbe L(zero_end2) ++ ++ ++ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 ++ kmovd %k0, %eax ++ /* Shift remaining length mask for last VEC. */ ++# ifdef USE_AS_WMEMCHR ++ shrl $CHAR_PER_VEC, %ecx ++# else ++ shrq $CHAR_PER_VEC, %rcx ++# endif ++ andl %ecx, %eax ++ jz L(zero_end2) ++ tzcntl %eax, %eax ++ leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax ++L(zero_end2): ++ ret ++ ++L(last_vec_x2): ++ tzcntl %eax, %eax ++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_x3): ++ tzcntl %eax, %eax ++ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++# endif ++ ++END (MEMCHR) ++#endif +diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S +new file mode 100644 +index 0000000000..cf4eff5d4a +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S +@@ -0,0 +1,12 @@ ++#ifndef MEMCMP ++# define MEMCMP __memcmp_avx2_movbe_rtm ++#endif ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define SECTION(p) p##.avx.rtm ++ ++#include "memcmp-avx2-movbe.S" +diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S +index 67fc575b59..87f9478eaf 100644 +--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S ++++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S +@@ -47,6 +47,10 @@ + # define VZEROUPPER vzeroupper + # endif + ++# ifndef SECTION ++# define SECTION(p) p##.avx ++# endif ++ + # define VEC_SIZE 32 + # define VEC_MASK ((1 << VEC_SIZE) - 1) + +@@ -55,7 +59,7 @@ + memcmp has to use UNSIGNED comparison for elemnts. + */ + +- .section .text.avx,"ax",@progbits ++ .section SECTION(.text),"ax",@progbits + ENTRY (MEMCMP) + # ifdef USE_AS_WMEMCMP + shl $2, %RDX_LP +@@ -123,8 +127,8 @@ ENTRY (MEMCMP) + vptest %ymm0, %ymm5 + jnc L(4x_vec_end) + xorl %eax, %eax +- VZEROUPPER +- ret ++L(return_vzeroupper): ++ ZERO_UPPER_VEC_REGISTERS_RETURN + + .p2align 4 + L(last_2x_vec): +@@ -144,8 +148,7 @@ L(last_vec): + vpmovmskb %ymm2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(first_vec): +@@ -164,8 +167,7 @@ L(wmemcmp_return): + movzbl (%rsi, %rcx), %edx + sub %edx, %eax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + # ifdef USE_AS_WMEMCMP + .p2align 4 +@@ -367,8 +369,7 @@ L(last_4x_vec): + vpmovmskb %ymm2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(4x_vec_end): +@@ -394,8 +395,7 @@ L(4x_vec_end): + movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx + sub %edx, %eax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(first_vec_x1): +@@ -410,8 +410,7 @@ L(first_vec_x1): + movzbl VEC_SIZE(%rsi, %rcx), %edx + sub %edx, %eax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(first_vec_x2): +@@ -426,7 +425,6 @@ L(first_vec_x2): + movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx + sub %edx, %eax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + END (MEMCMP) + #endif +diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +new file mode 100644 +index 0000000000..9c093972e1 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +@@ -0,0 +1,440 @@ ++/* memcmp/wmemcmp optimized with 256-bit EVEX instructions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++ ++/* memcmp/wmemcmp is implemented as: ++ 1. For size from 2 to 7 bytes, load as big endian with movbe and bswap ++ to avoid branches. ++ 2. Use overlapping compare to avoid branch. ++ 3. Use vector compare when size >= 4 bytes for memcmp or size >= 8 ++ bytes for wmemcmp. ++ 4. If size is 8 * VEC_SIZE or less, unroll the loop. ++ 5. Compare 4 * VEC_SIZE at a time with the aligned first memory ++ area. ++ 6. Use 2 vector compares when size is 2 * VEC_SIZE or less. ++ 7. Use 4 vector compares when size is 4 * VEC_SIZE or less. ++ 8. Use 8 vector compares when size is 8 * VEC_SIZE or less. */ ++ ++# include <sysdep.h> ++ ++# ifndef MEMCMP ++# define MEMCMP __memcmp_evex_movbe ++# endif ++ ++# define VMOVU vmovdqu64 ++ ++# ifdef USE_AS_WMEMCMP ++# define VPCMPEQ vpcmpeqd ++# else ++# define VPCMPEQ vpcmpeqb ++# endif ++ ++# define XMM1 xmm17 ++# define XMM2 xmm18 ++# define YMM1 ymm17 ++# define YMM2 ymm18 ++# define YMM3 ymm19 ++# define YMM4 ymm20 ++# define YMM5 ymm21 ++# define YMM6 ymm22 ++ ++# define VEC_SIZE 32 ++# ifdef USE_AS_WMEMCMP ++# define VEC_MASK 0xff ++# define XMM_MASK 0xf ++# else ++# define VEC_MASK 0xffffffff ++# define XMM_MASK 0xffff ++# endif ++ ++/* Warning! ++ wmemcmp has to use SIGNED comparison for elements. ++ memcmp has to use UNSIGNED comparison for elemnts. ++*/ ++ ++ .section .text.evex,"ax",@progbits ++ENTRY (MEMCMP) ++# ifdef USE_AS_WMEMCMP ++ shl $2, %RDX_LP ++# elif defined __ILP32__ ++ /* Clear the upper 32 bits. */ ++ movl %edx, %edx ++# endif ++ cmp $VEC_SIZE, %RDX_LP ++ jb L(less_vec) ++ ++ /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */ ++ VMOVU (%rsi), %YMM2 ++ VPCMPEQ (%rdi), %YMM2, %k1 ++ kmovd %k1, %eax ++ subl $VEC_MASK, %eax ++ jnz L(first_vec) ++ ++ cmpq $(VEC_SIZE * 2), %rdx ++ jbe L(last_vec) ++ ++ /* More than 2 * VEC. */ ++ cmpq $(VEC_SIZE * 8), %rdx ++ ja L(more_8x_vec) ++ cmpq $(VEC_SIZE * 4), %rdx ++ jb L(last_4x_vec) ++ ++ /* From 4 * VEC to 8 * VEC, inclusively. */ ++ VMOVU (%rsi), %YMM1 ++ VPCMPEQ (%rdi), %YMM1, %k1 ++ ++ VMOVU VEC_SIZE(%rsi), %YMM2 ++ VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 ++ ++ VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 ++ VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 ++ ++ VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 ++ VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 ++ ++ kandd %k1, %k2, %k5 ++ kandd %k3, %k4, %k6 ++ kandd %k5, %k6, %k6 ++ ++ kmovd %k6, %eax ++ cmpl $VEC_MASK, %eax ++ jne L(4x_vec_end) ++ ++ leaq -(4 * VEC_SIZE)(%rdi, %rdx), %rdi ++ leaq -(4 * VEC_SIZE)(%rsi, %rdx), %rsi ++ VMOVU (%rsi), %YMM1 ++ VPCMPEQ (%rdi), %YMM1, %k1 ++ ++ VMOVU VEC_SIZE(%rsi), %YMM2 ++ VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 ++ kandd %k1, %k2, %k5 ++ ++ VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 ++ VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 ++ kandd %k3, %k5, %k5 ++ ++ VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 ++ VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 ++ kandd %k4, %k5, %k5 ++ ++ kmovd %k5, %eax ++ cmpl $VEC_MASK, %eax ++ jne L(4x_vec_end) ++ xorl %eax, %eax ++ ret ++ ++ .p2align 4 ++L(last_2x_vec): ++ /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */ ++ VMOVU (%rsi), %YMM2 ++ VPCMPEQ (%rdi), %YMM2, %k2 ++ kmovd %k2, %eax ++ subl $VEC_MASK, %eax ++ jnz L(first_vec) ++ ++L(last_vec): ++ /* Use overlapping loads to avoid branches. */ ++ leaq -VEC_SIZE(%rdi, %rdx), %rdi ++ leaq -VEC_SIZE(%rsi, %rdx), %rsi ++ VMOVU (%rsi), %YMM2 ++ VPCMPEQ (%rdi), %YMM2, %k2 ++ kmovd %k2, %eax ++ subl $VEC_MASK, %eax ++ jnz L(first_vec) ++ ret ++ ++ .p2align 4 ++L(first_vec): ++ /* A byte or int32 is different within 16 or 32 bytes. */ ++ tzcntl %eax, %ecx ++# ifdef USE_AS_WMEMCMP ++ xorl %eax, %eax ++ movl (%rdi, %rcx, 4), %edx ++ cmpl (%rsi, %rcx, 4), %edx ++L(wmemcmp_return): ++ setl %al ++ negl %eax ++ orl $1, %eax ++# else ++ movzbl (%rdi, %rcx), %eax ++ movzbl (%rsi, %rcx), %edx ++ sub %edx, %eax ++# endif ++ ret ++ ++# ifdef USE_AS_WMEMCMP ++ .p2align 4 ++L(4): ++ xorl %eax, %eax ++ movl (%rdi), %edx ++ cmpl (%rsi), %edx ++ jne L(wmemcmp_return) ++ ret ++# else ++ .p2align 4 ++L(between_4_7): ++ /* Load as big endian with overlapping movbe to avoid branches. */ ++ movbe (%rdi), %eax ++ movbe (%rsi), %ecx ++ shlq $32, %rax ++ shlq $32, %rcx ++ movbe -4(%rdi, %rdx), %edi ++ movbe -4(%rsi, %rdx), %esi ++ orq %rdi, %rax ++ orq %rsi, %rcx ++ subq %rcx, %rax ++ je L(exit) ++ sbbl %eax, %eax ++ orl $1, %eax ++ ret ++ ++ .p2align 4 ++L(exit): ++ ret ++ ++ .p2align 4 ++L(between_2_3): ++ /* Load as big endian to avoid branches. */ ++ movzwl (%rdi), %eax ++ movzwl (%rsi), %ecx ++ shll $8, %eax ++ shll $8, %ecx ++ bswap %eax ++ bswap %ecx ++ movb -1(%rdi, %rdx), %al ++ movb -1(%rsi, %rdx), %cl ++ /* Subtraction is okay because the upper 8 bits are zero. */ ++ subl %ecx, %eax ++ ret ++ ++ .p2align 4 ++L(1): ++ movzbl (%rdi), %eax ++ movzbl (%rsi), %ecx ++ subl %ecx, %eax ++ ret ++# endif ++ ++ .p2align 4 ++L(zero): ++ xorl %eax, %eax ++ ret ++ ++ .p2align 4 ++L(less_vec): ++# ifdef USE_AS_WMEMCMP ++ /* It can only be 0, 4, 8, 12, 16, 20, 24, 28 bytes. */ ++ cmpb $4, %dl ++ je L(4) ++ jb L(zero) ++# else ++ cmpb $1, %dl ++ je L(1) ++ jb L(zero) ++ cmpb $4, %dl ++ jb L(between_2_3) ++ cmpb $8, %dl ++ jb L(between_4_7) ++# endif ++ cmpb $16, %dl ++ jae L(between_16_31) ++ /* It is between 8 and 15 bytes. */ ++ vmovq (%rdi), %XMM1 ++ vmovq (%rsi), %XMM2 ++ VPCMPEQ %XMM1, %XMM2, %k2 ++ kmovw %k2, %eax ++ subl $XMM_MASK, %eax ++ jnz L(first_vec) ++ /* Use overlapping loads to avoid branches. */ ++ leaq -8(%rdi, %rdx), %rdi ++ leaq -8(%rsi, %rdx), %rsi ++ vmovq (%rdi), %XMM1 ++ vmovq (%rsi), %XMM2 ++ VPCMPEQ %XMM1, %XMM2, %k2 ++ kmovw %k2, %eax ++ subl $XMM_MASK, %eax ++ jnz L(first_vec) ++ ret ++ ++ .p2align 4 ++L(between_16_31): ++ /* From 16 to 31 bytes. No branch when size == 16. */ ++ VMOVU (%rsi), %XMM2 ++ VPCMPEQ (%rdi), %XMM2, %k2 ++ kmovw %k2, %eax ++ subl $XMM_MASK, %eax ++ jnz L(first_vec) ++ ++ /* Use overlapping loads to avoid branches. */ ++ leaq -16(%rdi, %rdx), %rdi ++ leaq -16(%rsi, %rdx), %rsi ++ VMOVU (%rsi), %XMM2 ++ VPCMPEQ (%rdi), %XMM2, %k2 ++ kmovw %k2, %eax ++ subl $XMM_MASK, %eax ++ jnz L(first_vec) ++ ret ++ ++ .p2align 4 ++L(more_8x_vec): ++ /* More than 8 * VEC. Check the first VEC. */ ++ VMOVU (%rsi), %YMM2 ++ VPCMPEQ (%rdi), %YMM2, %k2 ++ kmovd %k2, %eax ++ subl $VEC_MASK, %eax ++ jnz L(first_vec) ++ ++ /* Align the first memory area for aligned loads in the loop. ++ Compute how much the first memory area is misaligned. */ ++ movq %rdi, %rcx ++ andl $(VEC_SIZE - 1), %ecx ++ /* Get the negative of offset for alignment. */ ++ subq $VEC_SIZE, %rcx ++ /* Adjust the second memory area. */ ++ subq %rcx, %rsi ++ /* Adjust the first memory area which should be aligned now. */ ++ subq %rcx, %rdi ++ /* Adjust length. */ ++ addq %rcx, %rdx ++ ++L(loop_4x_vec): ++ /* Compare 4 * VEC at a time forward. */ ++ VMOVU (%rsi), %YMM1 ++ VPCMPEQ (%rdi), %YMM1, %k1 ++ ++ VMOVU VEC_SIZE(%rsi), %YMM2 ++ VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 ++ kandd %k2, %k1, %k5 ++ ++ VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 ++ VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 ++ kandd %k3, %k5, %k5 ++ ++ VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 ++ VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 ++ kandd %k4, %k5, %k5 ++ ++ kmovd %k5, %eax ++ cmpl $VEC_MASK, %eax ++ jne L(4x_vec_end) ++ ++ addq $(VEC_SIZE * 4), %rdi ++ addq $(VEC_SIZE * 4), %rsi ++ ++ subq $(VEC_SIZE * 4), %rdx ++ cmpq $(VEC_SIZE * 4), %rdx ++ jae L(loop_4x_vec) ++ ++ /* Less than 4 * VEC. */ ++ cmpq $VEC_SIZE, %rdx ++ jbe L(last_vec) ++ cmpq $(VEC_SIZE * 2), %rdx ++ jbe L(last_2x_vec) ++ ++L(last_4x_vec): ++ /* From 2 * VEC to 4 * VEC. */ ++ VMOVU (%rsi), %YMM2 ++ VPCMPEQ (%rdi), %YMM2, %k2 ++ kmovd %k2, %eax ++ subl $VEC_MASK, %eax ++ jnz L(first_vec) ++ ++ addq $VEC_SIZE, %rdi ++ addq $VEC_SIZE, %rsi ++ VMOVU (%rsi), %YMM2 ++ VPCMPEQ (%rdi), %YMM2, %k2 ++ kmovd %k2, %eax ++ subl $VEC_MASK, %eax ++ jnz L(first_vec) ++ ++ /* Use overlapping loads to avoid branches. */ ++ leaq -(3 * VEC_SIZE)(%rdi, %rdx), %rdi ++ leaq -(3 * VEC_SIZE)(%rsi, %rdx), %rsi ++ VMOVU (%rsi), %YMM2 ++ VPCMPEQ (%rdi), %YMM2, %k2 ++ kmovd %k2, %eax ++ subl $VEC_MASK, %eax ++ jnz L(first_vec) ++ ++ addq $VEC_SIZE, %rdi ++ addq $VEC_SIZE, %rsi ++ VMOVU (%rsi), %YMM2 ++ VPCMPEQ (%rdi), %YMM2, %k2 ++ kmovd %k2, %eax ++ subl $VEC_MASK, %eax ++ jnz L(first_vec) ++ ret ++ ++ .p2align 4 ++L(4x_vec_end): ++ kmovd %k1, %eax ++ subl $VEC_MASK, %eax ++ jnz L(first_vec) ++ kmovd %k2, %eax ++ subl $VEC_MASK, %eax ++ jnz L(first_vec_x1) ++ kmovd %k3, %eax ++ subl $VEC_MASK, %eax ++ jnz L(first_vec_x2) ++ kmovd %k4, %eax ++ subl $VEC_MASK, %eax ++ tzcntl %eax, %ecx ++# ifdef USE_AS_WMEMCMP ++ xorl %eax, %eax ++ movl (VEC_SIZE * 3)(%rdi, %rcx, 4), %edx ++ cmpl (VEC_SIZE * 3)(%rsi, %rcx, 4), %edx ++ jmp L(wmemcmp_return) ++# else ++ movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax ++ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx ++ sub %edx, %eax ++# endif ++ ret ++ ++ .p2align 4 ++L(first_vec_x1): ++ tzcntl %eax, %ecx ++# ifdef USE_AS_WMEMCMP ++ xorl %eax, %eax ++ movl VEC_SIZE(%rdi, %rcx, 4), %edx ++ cmpl VEC_SIZE(%rsi, %rcx, 4), %edx ++ jmp L(wmemcmp_return) ++# else ++ movzbl VEC_SIZE(%rdi, %rcx), %eax ++ movzbl VEC_SIZE(%rsi, %rcx), %edx ++ sub %edx, %eax ++# endif ++ ret ++ ++ .p2align 4 ++L(first_vec_x2): ++ tzcntl %eax, %ecx ++# ifdef USE_AS_WMEMCMP ++ xorl %eax, %eax ++ movl (VEC_SIZE * 2)(%rdi, %rcx, 4), %edx ++ cmpl (VEC_SIZE * 2)(%rsi, %rcx, 4), %edx ++ jmp L(wmemcmp_return) ++# else ++ movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax ++ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx ++ sub %edx, %eax ++# endif ++ ret ++END (MEMCMP) ++#endif +diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S +new file mode 100644 +index 0000000000..1ec1962e86 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S +@@ -0,0 +1,17 @@ ++#if IS_IN (libc) ++# define VEC_SIZE 32 ++# define VEC(i) ymm##i ++# define VMOVNT vmovntdq ++# define VMOVU vmovdqu ++# define VMOVA vmovdqa ++ ++# define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++# define VZEROUPPER_RETURN jmp L(return) ++ ++# define SECTION(p) p##.avx.rtm ++# define MEMMOVE_SYMBOL(p,s) p##_avx_##s##_rtm ++ ++# include "memmove-vec-unaligned-erms.S" ++#endif +diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S +index aac1515cf6..848848ab39 100644 +--- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S +@@ -1,11 +1,32 @@ + #if IS_IN (libc) + # define VEC_SIZE 64 +-# define VEC(i) zmm##i ++# define XMM0 xmm16 ++# define XMM1 xmm17 ++# define YMM0 ymm16 ++# define YMM1 ymm17 ++# define VEC0 zmm16 ++# define VEC1 zmm17 ++# define VEC2 zmm18 ++# define VEC3 zmm19 ++# define VEC4 zmm20 ++# define VEC5 zmm21 ++# define VEC6 zmm22 ++# define VEC7 zmm23 ++# define VEC8 zmm24 ++# define VEC9 zmm25 ++# define VEC10 zmm26 ++# define VEC11 zmm27 ++# define VEC12 zmm28 ++# define VEC13 zmm29 ++# define VEC14 zmm30 ++# define VEC15 zmm31 ++# define VEC(i) VEC##i + # define VMOVNT vmovntdq + # define VMOVU vmovdqu64 + # define VMOVA vmovdqa64 ++# define VZEROUPPER + +-# define SECTION(p) p##.avx512 ++# define SECTION(p) p##.evex512 + # define MEMMOVE_SYMBOL(p,s) p##_avx512_##s + + # include "memmove-vec-unaligned-erms.S" +diff --git a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S +new file mode 100644 +index 0000000000..0cbce8f944 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S +@@ -0,0 +1,33 @@ ++#if IS_IN (libc) ++# define VEC_SIZE 32 ++# define XMM0 xmm16 ++# define XMM1 xmm17 ++# define YMM0 ymm16 ++# define YMM1 ymm17 ++# define VEC0 ymm16 ++# define VEC1 ymm17 ++# define VEC2 ymm18 ++# define VEC3 ymm19 ++# define VEC4 ymm20 ++# define VEC5 ymm21 ++# define VEC6 ymm22 ++# define VEC7 ymm23 ++# define VEC8 ymm24 ++# define VEC9 ymm25 ++# define VEC10 ymm26 ++# define VEC11 ymm27 ++# define VEC12 ymm28 ++# define VEC13 ymm29 ++# define VEC14 ymm30 ++# define VEC15 ymm31 ++# define VEC(i) VEC##i ++# define VMOVNT vmovntdq ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 ++# define VZEROUPPER ++ ++# define SECTION(p) p##.evex ++# define MEMMOVE_SYMBOL(p,s) p##_evex_##s ++ ++# include "memmove-vec-unaligned-erms.S" ++#endif +diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +index bd5dc1a3f3..f71c343ecb 100644 +--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +@@ -48,6 +48,14 @@ + # define MEMMOVE_CHK_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s) + #endif + ++#ifndef XMM0 ++# define XMM0 xmm0 ++#endif ++ ++#ifndef YMM0 ++# define YMM0 ymm0 ++#endif ++ + #ifndef VZEROUPPER + # if VEC_SIZE > 16 + # define VZEROUPPER vzeroupper +@@ -56,6 +64,13 @@ + # endif + #endif + ++/* Avoid short distance rep movsb only with non-SSE vector. */ ++#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB ++# define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16) ++#else ++# define AVOID_SHORT_DISTANCE_REP_MOVSB 0 ++#endif ++ + #ifndef PREFETCH + # define PREFETCH(addr) prefetcht0 addr + #endif +@@ -132,11 +147,12 @@ L(last_2x_vec): + VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1) + VMOVU %VEC(0), (%rdi) + VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) +- VZEROUPPER + #if !defined USE_MULTIARCH || !IS_IN (libc) + L(nop): +-#endif + ret ++#else ++ VZEROUPPER_RETURN ++#endif + #if defined USE_MULTIARCH && IS_IN (libc) + END (MEMMOVE_SYMBOL (__memmove, unaligned)) + +@@ -229,8 +245,11 @@ L(last_2x_vec): + VMOVU %VEC(0), (%rdi) + VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) + L(return): +- VZEROUPPER ++#if VEC_SIZE > 16 ++ ZERO_UPPER_VEC_REGISTERS_RETURN ++#else + ret ++#endif + + L(movsb): + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP +@@ -243,7 +262,21 @@ L(movsb): + cmpq %r9, %rdi + /* Avoid slow backward REP MOVSB. */ + jb L(more_8x_vec_backward) ++# if AVOID_SHORT_DISTANCE_REP_MOVSB ++ movq %rdi, %rcx ++ subq %rsi, %rcx ++ jmp 2f ++# endif + 1: ++# if AVOID_SHORT_DISTANCE_REP_MOVSB ++ movq %rsi, %rcx ++ subq %rdi, %rcx ++2: ++/* Avoid "rep movsb" if RCX, the distance between source and destination, ++ is N*4GB + [1..63] with N >= 0. */ ++ cmpl $63, %ecx ++ jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */ ++# endif + mov %RDX_LP, %RCX_LP + rep movsb + L(nop): +@@ -277,21 +310,20 @@ L(less_vec): + #if VEC_SIZE > 32 + L(between_32_63): + /* From 32 to 63. No branch when size == 32. */ +- vmovdqu (%rsi), %ymm0 +- vmovdqu -32(%rsi,%rdx), %ymm1 +- vmovdqu %ymm0, (%rdi) +- vmovdqu %ymm1, -32(%rdi,%rdx) +- VZEROUPPER +- ret ++ VMOVU (%rsi), %YMM0 ++ VMOVU -32(%rsi,%rdx), %YMM1 ++ VMOVU %YMM0, (%rdi) ++ VMOVU %YMM1, -32(%rdi,%rdx) ++ VZEROUPPER_RETURN + #endif + #if VEC_SIZE > 16 + /* From 16 to 31. No branch when size == 16. */ + L(between_16_31): +- vmovdqu (%rsi), %xmm0 +- vmovdqu -16(%rsi,%rdx), %xmm1 +- vmovdqu %xmm0, (%rdi) +- vmovdqu %xmm1, -16(%rdi,%rdx) +- ret ++ VMOVU (%rsi), %XMM0 ++ VMOVU -16(%rsi,%rdx), %XMM1 ++ VMOVU %XMM0, (%rdi) ++ VMOVU %XMM1, -16(%rdi,%rdx) ++ VZEROUPPER_RETURN + #endif + L(between_8_15): + /* From 8 to 15. No branch when size == 8. */ +@@ -344,8 +376,7 @@ L(more_2x_vec): + VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx) + VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx) + VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + L(last_4x_vec): + /* Copy from 2 * VEC to 4 * VEC. */ + VMOVU (%rsi), %VEC(0) +@@ -356,8 +387,7 @@ L(last_4x_vec): + VMOVU %VEC(1), VEC_SIZE(%rdi) + VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx) + VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + L(more_8x_vec): + cmpq %rsi, %rdi +@@ -413,8 +443,7 @@ L(loop_4x_vec_forward): + VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx) + /* Store the first VEC. */ + VMOVU %VEC(4), (%r11) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + L(more_8x_vec_backward): + /* Load the first 4 * VEC and last VEC to support overlapping +@@ -465,8 +494,7 @@ L(loop_4x_vec_backward): + VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi) + /* Store the last VEC. */ + VMOVU %VEC(8), (%r11) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) + L(large_forward): +@@ -501,8 +529,7 @@ L(loop_large_forward): + VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx) + /* Store the first VEC. */ + VMOVU %VEC(4), (%r11) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + L(large_backward): + /* Don't use non-temporal store if there is overlap between +@@ -536,8 +563,7 @@ L(loop_large_backward): + VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi) + /* Store the last VEC. */ + VMOVU %VEC(8), (%r11) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + #endif + END (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) + +diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S +new file mode 100644 +index 0000000000..cea2d2a72d +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S +@@ -0,0 +1,12 @@ ++#ifndef MEMRCHR ++# define MEMRCHR __memrchr_avx2_rtm ++#endif ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define SECTION(p) p##.avx.rtm ++ ++#include "memrchr-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S +index f5437b54de..c8d54c08d6 100644 +--- a/sysdeps/x86_64/multiarch/memrchr-avx2.S ++++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S +@@ -20,14 +20,22 @@ + + # include <sysdep.h> + ++# ifndef MEMRCHR ++# define MEMRCHR __memrchr_avx2 ++# endif ++ + # ifndef VZEROUPPER + # define VZEROUPPER vzeroupper + # endif + ++# ifndef SECTION ++# define SECTION(p) p##.avx ++# endif ++ + # define VEC_SIZE 32 + +- .section .text.avx,"ax",@progbits +-ENTRY (__memrchr_avx2) ++ .section SECTION(.text),"ax",@progbits ++ENTRY (MEMRCHR) + /* Broadcast CHAR to YMM0. */ + vmovd %esi, %xmm0 + vpbroadcastb %xmm0, %ymm0 +@@ -134,8 +142,8 @@ L(loop_4x_vec): + vpmovmskb %ymm1, %eax + bsrl %eax, %eax + addq %rdi, %rax +- VZEROUPPER +- ret ++L(return_vzeroupper): ++ ZERO_UPPER_VEC_REGISTERS_RETURN + + .p2align 4 + L(last_4x_vec_or_less): +@@ -169,8 +177,7 @@ L(last_4x_vec_or_less): + addq %rax, %rdx + jl L(zero) + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(last_2x_vec): +@@ -191,31 +198,27 @@ L(last_2x_vec): + jl L(zero) + addl $(VEC_SIZE * 2), %eax + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_x0): + bsrl %eax, %eax + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_x1): + bsrl %eax, %eax + addl $VEC_SIZE, %eax + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_x2): + bsrl %eax, %eax + addl $(VEC_SIZE * 2), %eax + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_x3): +@@ -232,8 +235,7 @@ L(last_vec_x1_check): + jl L(zero) + addl $VEC_SIZE, %eax + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_x3_check): +@@ -243,12 +245,14 @@ L(last_vec_x3_check): + jl L(zero) + addl $(VEC_SIZE * 3), %eax + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(zero): +- VZEROUPPER ++ xorl %eax, %eax ++ VZEROUPPER_RETURN ++ ++ .p2align 4 + L(null): + xorl %eax, %eax + ret +@@ -273,8 +277,7 @@ L(last_vec_or_less_aligned): + + bsrl %eax, %eax + addq %rdi, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_or_less): +@@ -315,8 +318,7 @@ L(last_vec_or_less): + bsrl %eax, %eax + addq %rdi, %rax + addq %r8, %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_2x_aligned): +@@ -353,7 +355,6 @@ L(last_vec_2x_aligned): + bsrl %eax, %eax + addq %rdi, %rax + addq %r8, %rax +- VZEROUPPER +- ret +-END (__memrchr_avx2) ++ VZEROUPPER_RETURN ++END (MEMRCHR) + #endif +diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S +new file mode 100644 +index 0000000000..16bf8e02b1 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memrchr-evex.S +@@ -0,0 +1,337 @@ ++/* memrchr optimized with 256-bit EVEX instructions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++ ++# include <sysdep.h> ++ ++# define VMOVA vmovdqa64 ++ ++# define YMMMATCH ymm16 ++ ++# define VEC_SIZE 32 ++ ++ .section .text.evex,"ax",@progbits ++ENTRY (__memrchr_evex) ++ /* Broadcast CHAR to YMMMATCH. */ ++ vpbroadcastb %esi, %YMMMATCH ++ ++ sub $VEC_SIZE, %RDX_LP ++ jbe L(last_vec_or_less) ++ ++ add %RDX_LP, %RDI_LP ++ ++ /* Check the last VEC_SIZE bytes. */ ++ vpcmpb $0, (%rdi), %YMMMATCH, %k1 ++ kmovd %k1, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x0) ++ ++ subq $(VEC_SIZE * 4), %rdi ++ movl %edi, %ecx ++ andl $(VEC_SIZE - 1), %ecx ++ jz L(aligned_more) ++ ++ /* Align data for aligned loads in the loop. */ ++ addq $VEC_SIZE, %rdi ++ addq $VEC_SIZE, %rdx ++ andq $-VEC_SIZE, %rdi ++ subq %rcx, %rdx ++ ++ .p2align 4 ++L(aligned_more): ++ subq $(VEC_SIZE * 4), %rdx ++ jbe L(last_4x_vec_or_less) ++ ++ /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time ++ since data is only aligned to VEC_SIZE. */ ++ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 ++ kmovd %k1, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x3) ++ ++ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2 ++ kmovd %k2, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x2) ++ ++ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3 ++ kmovd %k3, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x1) ++ ++ vpcmpb $0, (%rdi), %YMMMATCH, %k4 ++ kmovd %k4, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x0) ++ ++ /* Align data to 4 * VEC_SIZE for loop with fewer branches. ++ There are some overlaps with above if data isn't aligned ++ to 4 * VEC_SIZE. */ ++ movl %edi, %ecx ++ andl $(VEC_SIZE * 4 - 1), %ecx ++ jz L(loop_4x_vec) ++ ++ addq $(VEC_SIZE * 4), %rdi ++ addq $(VEC_SIZE * 4), %rdx ++ andq $-(VEC_SIZE * 4), %rdi ++ subq %rcx, %rdx ++ ++ .p2align 4 ++L(loop_4x_vec): ++ /* Compare 4 * VEC at a time forward. */ ++ subq $(VEC_SIZE * 4), %rdi ++ subq $(VEC_SIZE * 4), %rdx ++ jbe L(last_4x_vec_or_less) ++ ++ vpcmpb $0, (%rdi), %YMMMATCH, %k1 ++ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k2 ++ kord %k1, %k2, %k5 ++ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k3 ++ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k4 ++ ++ kord %k3, %k4, %k6 ++ kortestd %k5, %k6 ++ jz L(loop_4x_vec) ++ ++ /* There is a match. */ ++ kmovd %k4, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x3) ++ ++ kmovd %k3, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x2) ++ ++ kmovd %k2, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x1) ++ ++ kmovd %k1, %eax ++ bsrl %eax, %eax ++ addq %rdi, %rax ++ ret ++ ++ .p2align 4 ++L(last_4x_vec_or_less): ++ addl $(VEC_SIZE * 4), %edx ++ cmpl $(VEC_SIZE * 2), %edx ++ jbe L(last_2x_vec) ++ ++ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 ++ kmovd %k1, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x3) ++ ++ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2 ++ kmovd %k2, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x2) ++ ++ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3 ++ kmovd %k3, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x1_check) ++ cmpl $(VEC_SIZE * 3), %edx ++ jbe L(zero) ++ ++ vpcmpb $0, (%rdi), %YMMMATCH, %k4 ++ kmovd %k4, %eax ++ testl %eax, %eax ++ jz L(zero) ++ bsrl %eax, %eax ++ subq $(VEC_SIZE * 4), %rdx ++ addq %rax, %rdx ++ jl L(zero) ++ addq %rdi, %rax ++ ret ++ ++ .p2align 4 ++L(last_2x_vec): ++ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 ++ kmovd %k1, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x3_check) ++ cmpl $VEC_SIZE, %edx ++ jbe L(zero) ++ ++ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k1 ++ kmovd %k1, %eax ++ testl %eax, %eax ++ jz L(zero) ++ bsrl %eax, %eax ++ subq $(VEC_SIZE * 2), %rdx ++ addq %rax, %rdx ++ jl L(zero) ++ addl $(VEC_SIZE * 2), %eax ++ addq %rdi, %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_x0): ++ bsrl %eax, %eax ++ addq %rdi, %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_x1): ++ bsrl %eax, %eax ++ addl $VEC_SIZE, %eax ++ addq %rdi, %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_x2): ++ bsrl %eax, %eax ++ addl $(VEC_SIZE * 2), %eax ++ addq %rdi, %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_x3): ++ bsrl %eax, %eax ++ addl $(VEC_SIZE * 3), %eax ++ addq %rdi, %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_x1_check): ++ bsrl %eax, %eax ++ subq $(VEC_SIZE * 3), %rdx ++ addq %rax, %rdx ++ jl L(zero) ++ addl $VEC_SIZE, %eax ++ addq %rdi, %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_x3_check): ++ bsrl %eax, %eax ++ subq $VEC_SIZE, %rdx ++ addq %rax, %rdx ++ jl L(zero) ++ addl $(VEC_SIZE * 3), %eax ++ addq %rdi, %rax ++ ret ++ ++ .p2align 4 ++L(zero): ++ xorl %eax, %eax ++ ret ++ ++ .p2align 4 ++L(last_vec_or_less_aligned): ++ movl %edx, %ecx ++ ++ vpcmpb $0, (%rdi), %YMMMATCH, %k1 ++ ++ movl $1, %edx ++ /* Support rdx << 32. */ ++ salq %cl, %rdx ++ subq $1, %rdx ++ ++ kmovd %k1, %eax ++ ++ /* Remove the trailing bytes. */ ++ andl %edx, %eax ++ testl %eax, %eax ++ jz L(zero) ++ ++ bsrl %eax, %eax ++ addq %rdi, %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_or_less): ++ addl $VEC_SIZE, %edx ++ ++ /* Check for zero length. */ ++ testl %edx, %edx ++ jz L(zero) ++ ++ movl %edi, %ecx ++ andl $(VEC_SIZE - 1), %ecx ++ jz L(last_vec_or_less_aligned) ++ ++ movl %ecx, %esi ++ movl %ecx, %r8d ++ addl %edx, %esi ++ andq $-VEC_SIZE, %rdi ++ ++ subl $VEC_SIZE, %esi ++ ja L(last_vec_2x_aligned) ++ ++ /* Check the last VEC. */ ++ vpcmpb $0, (%rdi), %YMMMATCH, %k1 ++ kmovd %k1, %eax ++ ++ /* Remove the leading and trailing bytes. */ ++ sarl %cl, %eax ++ movl %edx, %ecx ++ ++ movl $1, %edx ++ sall %cl, %edx ++ subl $1, %edx ++ ++ andl %edx, %eax ++ testl %eax, %eax ++ jz L(zero) ++ ++ bsrl %eax, %eax ++ addq %rdi, %rax ++ addq %r8, %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_2x_aligned): ++ movl %esi, %ecx ++ ++ /* Check the last VEC. */ ++ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k1 ++ ++ movl $1, %edx ++ sall %cl, %edx ++ subl $1, %edx ++ ++ kmovd %k1, %eax ++ ++ /* Remove the trailing bytes. */ ++ andl %edx, %eax ++ ++ testl %eax, %eax ++ jnz L(last_vec_x1) ++ ++ /* Check the second last VEC. */ ++ vpcmpb $0, (%rdi), %YMMMATCH, %k1 ++ ++ movl %r8d, %ecx ++ ++ kmovd %k1, %eax ++ ++ /* Remove the leading bytes. Must use unsigned right shift for ++ bsrl below. */ ++ shrl %cl, %eax ++ testl %eax, %eax ++ jz L(zero) ++ ++ bsrl %eax, %eax ++ addq %rdi, %rax ++ addq %r8, %rax ++ ret ++END (__memrchr_evex) ++#endif +diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S +new file mode 100644 +index 0000000000..8ac3e479bb +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S +@@ -0,0 +1,10 @@ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return) ++ ++#define SECTION(p) p##.avx.rtm ++#define MEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm ++#define WMEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm ++ ++#include "memset-avx2-unaligned-erms.S" +diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +index 7ab3d89849..ae0860f36a 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +@@ -14,9 +14,15 @@ + movq r, %rax; \ + vpbroadcastd %xmm0, %ymm0 + +-# define SECTION(p) p##.avx +-# define MEMSET_SYMBOL(p,s) p##_avx2_##s +-# define WMEMSET_SYMBOL(p,s) p##_avx2_##s ++# ifndef SECTION ++# define SECTION(p) p##.avx ++# endif ++# ifndef MEMSET_SYMBOL ++# define MEMSET_SYMBOL(p,s) p##_avx2_##s ++# endif ++# ifndef WMEMSET_SYMBOL ++# define WMEMSET_SYMBOL(p,s) p##_avx2_##s ++# endif + + # include "memset-vec-unaligned-erms.S" + #endif +diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +index 0783979ca5..22e7b187c8 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +@@ -1,22 +1,22 @@ + #if IS_IN (libc) + # define VEC_SIZE 64 +-# define VEC(i) zmm##i ++# define XMM0 xmm16 ++# define YMM0 ymm16 ++# define VEC0 zmm16 ++# define VEC(i) VEC##i + # define VMOVU vmovdqu64 + # define VMOVA vmovdqa64 ++# define VZEROUPPER + + # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ +- vmovd d, %xmm0; \ + movq r, %rax; \ +- vpbroadcastb %xmm0, %xmm0; \ +- vpbroadcastq %xmm0, %zmm0 ++ vpbroadcastb d, %VEC0 + + # define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ +- vmovd d, %xmm0; \ + movq r, %rax; \ +- vpbroadcastd %xmm0, %xmm0; \ +- vpbroadcastq %xmm0, %zmm0 ++ vpbroadcastd d, %VEC0 + +-# define SECTION(p) p##.avx512 ++# define SECTION(p) p##.evex512 + # define MEMSET_SYMBOL(p,s) p##_avx512_##s + # define WMEMSET_SYMBOL(p,s) p##_avx512_##s + +diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +new file mode 100644 +index 0000000000..ae0a4d6e46 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +@@ -0,0 +1,24 @@ ++#if IS_IN (libc) ++# define VEC_SIZE 32 ++# define XMM0 xmm16 ++# define YMM0 ymm16 ++# define VEC0 ymm16 ++# define VEC(i) VEC##i ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 ++# define VZEROUPPER ++ ++# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ ++ movq r, %rax; \ ++ vpbroadcastb d, %VEC0 ++ ++# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ ++ movq r, %rax; \ ++ vpbroadcastd d, %VEC0 ++ ++# define SECTION(p) p##.evex ++# define MEMSET_SYMBOL(p,s) p##_evex_##s ++# define WMEMSET_SYMBOL(p,s) p##_evex_##s ++ ++# include "memset-vec-unaligned-erms.S" ++#endif +diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +index 2bfc95de05..de5a8a38f5 100644 +--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +@@ -34,20 +34,25 @@ + # define WMEMSET_CHK_SYMBOL(p,s) WMEMSET_SYMBOL(p, s) + #endif + ++#ifndef XMM0 ++# define XMM0 xmm0 ++#endif ++ ++#ifndef YMM0 ++# define YMM0 ymm0 ++#endif ++ + #ifndef VZEROUPPER + # if VEC_SIZE > 16 + # define VZEROUPPER vzeroupper ++# define VZEROUPPER_SHORT_RETURN vzeroupper; ret + # else + # define VZEROUPPER + # endif + #endif + + #ifndef VZEROUPPER_SHORT_RETURN +-# if VEC_SIZE > 16 +-# define VZEROUPPER_SHORT_RETURN vzeroupper +-# else +-# define VZEROUPPER_SHORT_RETURN rep +-# endif ++# define VZEROUPPER_SHORT_RETURN rep; ret + #endif + + #ifndef MOVQ +@@ -67,7 +72,7 @@ + ENTRY (__bzero) + mov %RDI_LP, %RAX_LP /* Set return value. */ + mov %RSI_LP, %RDX_LP /* Set n. */ +- pxor %xmm0, %xmm0 ++ pxor %XMM0, %XMM0 + jmp L(entry_from_bzero) + END (__bzero) + weak_alias (__bzero, bzero) +@@ -109,8 +114,7 @@ L(entry_from_bzero): + /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ + VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) + VMOVU %VEC(0), (%rdi) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + #if defined USE_MULTIARCH && IS_IN (libc) + END (MEMSET_SYMBOL (__memset, unaligned)) + +@@ -133,14 +137,12 @@ ENTRY (__memset_erms) + ENTRY (MEMSET_SYMBOL (__memset, erms)) + # endif + L(stosb): +- /* Issue vzeroupper before rep stosb. */ +- VZEROUPPER + mov %RDX_LP, %RCX_LP + movzbl %sil, %eax + mov %RDI_LP, %RDX_LP + rep stosb + mov %RDX_LP, %RAX_LP +- ret ++ VZEROUPPER_RETURN + # if VEC_SIZE == 16 + END (__memset_erms) + # else +@@ -167,8 +169,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms)) + /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ + VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) + VMOVU %VEC(0), (%rdi) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + L(stosb_more_2x_vec): + cmp __x86_rep_stosb_threshold(%rip), %RDX_LP +@@ -182,8 +183,11 @@ L(more_2x_vec): + VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) + VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx) + L(return): +- VZEROUPPER ++#if VEC_SIZE > 16 ++ ZERO_UPPER_VEC_REGISTERS_RETURN ++#else + ret ++#endif + + L(loop_start): + leaq (VEC_SIZE * 4)(%rdi), %rcx +@@ -209,7 +213,6 @@ L(loop): + cmpq %rcx, %rdx + jne L(loop) + VZEROUPPER_SHORT_RETURN +- ret + L(less_vec): + /* Less than 1 VEC. */ + # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 +@@ -223,7 +226,7 @@ L(less_vec): + cmpb $16, %dl + jae L(between_16_31) + # endif +- MOVQ %xmm0, %rcx ++ MOVQ %XMM0, %rcx + cmpb $8, %dl + jae L(between_8_15) + cmpb $4, %dl +@@ -233,40 +236,34 @@ L(less_vec): + jb 1f + movb %cl, (%rdi) + 1: +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + # if VEC_SIZE > 32 + /* From 32 to 63. No branch when size == 32. */ + L(between_32_63): +- vmovdqu %ymm0, -32(%rdi,%rdx) +- vmovdqu %ymm0, (%rdi) +- VZEROUPPER +- ret ++ VMOVU %YMM0, -32(%rdi,%rdx) ++ VMOVU %YMM0, (%rdi) ++ VZEROUPPER_RETURN + # endif + # if VEC_SIZE > 16 + /* From 16 to 31. No branch when size == 16. */ + L(between_16_31): +- vmovdqu %xmm0, -16(%rdi,%rdx) +- vmovdqu %xmm0, (%rdi) +- VZEROUPPER +- ret ++ VMOVU %XMM0, -16(%rdi,%rdx) ++ VMOVU %XMM0, (%rdi) ++ VZEROUPPER_RETURN + # endif + /* From 8 to 15. No branch when size == 8. */ + L(between_8_15): + movq %rcx, -8(%rdi,%rdx) + movq %rcx, (%rdi) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + L(between_4_7): + /* From 4 to 7. No branch when size == 4. */ + movl %ecx, -4(%rdi,%rdx) + movl %ecx, (%rdi) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + L(between_2_3): + /* From 2 to 3. No branch when size == 2. */ + movw %cx, -2(%rdi,%rdx) + movw %cx, (%rdi) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + END (MEMSET_SYMBOL (__memset, unaligned_erms)) +diff --git a/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S +new file mode 100644 +index 0000000000..acc5f6e2fb +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S +@@ -0,0 +1,4 @@ ++#define MEMCHR __rawmemchr_avx2_rtm ++#define USE_AS_RAWMEMCHR 1 ++ ++#include "memchr-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/rawmemchr-evex.S b/sysdeps/x86_64/multiarch/rawmemchr-evex.S +new file mode 100644 +index 0000000000..ec942b77ba +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/rawmemchr-evex.S +@@ -0,0 +1,4 @@ ++#define MEMCHR __rawmemchr_evex ++#define USE_AS_RAWMEMCHR 1 ++ ++#include "memchr-evex.S" +diff --git a/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S +new file mode 100644 +index 0000000000..2b9c07a59f +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S +@@ -0,0 +1,3 @@ ++#define USE_AS_STPCPY ++#define STRCPY __stpcpy_avx2_rtm ++#include "strcpy-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/stpcpy-evex.S b/sysdeps/x86_64/multiarch/stpcpy-evex.S +new file mode 100644 +index 0000000000..7c6f26cd98 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/stpcpy-evex.S +@@ -0,0 +1,3 @@ ++#define USE_AS_STPCPY ++#define STRCPY __stpcpy_evex ++#include "strcpy-evex.S" +diff --git a/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S +new file mode 100644 +index 0000000000..60a2ccfe53 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S +@@ -0,0 +1,4 @@ ++#define USE_AS_STPCPY ++#define USE_AS_STRNCPY ++#define STRCPY __stpncpy_avx2_rtm ++#include "strcpy-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/stpncpy-evex.S b/sysdeps/x86_64/multiarch/stpncpy-evex.S +new file mode 100644 +index 0000000000..1570014d1c +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/stpncpy-evex.S +@@ -0,0 +1,4 @@ ++#define USE_AS_STPCPY ++#define USE_AS_STRNCPY ++#define STRCPY __stpncpy_evex ++#include "strcpy-evex.S" +diff --git a/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S +new file mode 100644 +index 0000000000..637fb557c4 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S +@@ -0,0 +1,12 @@ ++#ifndef STRCAT ++# define STRCAT __strcat_avx2_rtm ++#endif ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define SECTION(p) p##.avx.rtm ++ ++#include "strcat-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/strcat-avx2.S b/sysdeps/x86_64/multiarch/strcat-avx2.S +index a4143bf8f5..1e6d4827ee 100644 +--- a/sysdeps/x86_64/multiarch/strcat-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcat-avx2.S +@@ -30,7 +30,11 @@ + /* Number of bytes in a vector register */ + # define VEC_SIZE 32 + +- .section .text.avx,"ax",@progbits ++# ifndef SECTION ++# define SECTION(p) p##.avx ++# endif ++ ++ .section SECTION(.text),"ax",@progbits + ENTRY (STRCAT) + mov %rdi, %r9 + # ifdef USE_AS_STRNCAT +diff --git a/sysdeps/x86_64/multiarch/strcat-evex.S b/sysdeps/x86_64/multiarch/strcat-evex.S +new file mode 100644 +index 0000000000..97c3d85b6d +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strcat-evex.S +@@ -0,0 +1,283 @@ ++/* strcat with 256-bit EVEX instructions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++ ++# include <sysdep.h> ++ ++# ifndef STRCAT ++# define STRCAT __strcat_evex ++# endif ++ ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 ++ ++/* zero register */ ++# define XMMZERO xmm16 ++# define YMMZERO ymm16 ++# define YMM0 ymm17 ++# define YMM1 ymm18 ++ ++# define USE_AS_STRCAT ++ ++/* Number of bytes in a vector register */ ++# define VEC_SIZE 32 ++ ++ .section .text.evex,"ax",@progbits ++ENTRY (STRCAT) ++ mov %rdi, %r9 ++# ifdef USE_AS_STRNCAT ++ mov %rdx, %r8 ++# endif ++ ++ xor %eax, %eax ++ mov %edi, %ecx ++ and $((VEC_SIZE * 4) - 1), %ecx ++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO ++ cmp $(VEC_SIZE * 3), %ecx ++ ja L(fourth_vector_boundary) ++ vpcmpb $0, (%rdi), %YMMZERO, %k0 ++ kmovd %k0, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_first_vector) ++ mov %rdi, %rax ++ and $-VEC_SIZE, %rax ++ jmp L(align_vec_size_start) ++L(fourth_vector_boundary): ++ mov %rdi, %rax ++ and $-VEC_SIZE, %rax ++ vpcmpb $0, (%rax), %YMMZERO, %k0 ++ mov $-1, %r10d ++ sub %rax, %rcx ++ shl %cl, %r10d ++ kmovd %k0, %edx ++ and %r10d, %edx ++ jnz L(exit) ++ ++L(align_vec_size_start): ++ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 ++ kmovd %k0, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_second_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 ++ kmovd %k1, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_third_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 ++ kmovd %k2, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_fourth_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 ++ kmovd %k3, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_fifth_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 ++ add $(VEC_SIZE * 4), %rax ++ kmovd %k4, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_second_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 ++ kmovd %k1, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_third_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 ++ kmovd %k2, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_fourth_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 ++ kmovd %k3, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_fifth_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 ++ kmovd %k4, %edx ++ add $(VEC_SIZE * 4), %rax ++ test %edx, %edx ++ jnz L(exit_null_on_second_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 ++ kmovd %k1, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_third_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 ++ kmovd %k2, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_fourth_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 ++ kmovd %k3, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_fifth_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 ++ add $(VEC_SIZE * 4), %rax ++ kmovd %k4, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_second_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 ++ kmovd %k1, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_third_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 ++ kmovd %k2, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_fourth_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 ++ kmovd %k3, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_fifth_vector) ++ ++ test $((VEC_SIZE * 4) - 1), %rax ++ jz L(align_four_vec_loop) ++ ++ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 ++ add $(VEC_SIZE * 5), %rax ++ kmovd %k4, %edx ++ test %edx, %edx ++ jnz L(exit) ++ ++ test $((VEC_SIZE * 4) - 1), %rax ++ jz L(align_four_vec_loop) ++ ++ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 ++ add $VEC_SIZE, %rax ++ kmovd %k0, %edx ++ test %edx, %edx ++ jnz L(exit) ++ ++ test $((VEC_SIZE * 4) - 1), %rax ++ jz L(align_four_vec_loop) ++ ++ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 ++ add $VEC_SIZE, %rax ++ kmovd %k0, %edx ++ test %edx, %edx ++ jnz L(exit) ++ ++ test $((VEC_SIZE * 4) - 1), %rax ++ jz L(align_four_vec_loop) ++ ++ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k1 ++ add $VEC_SIZE, %rax ++ kmovd %k1, %edx ++ test %edx, %edx ++ jnz L(exit) ++ ++ add $VEC_SIZE, %rax ++ ++ .p2align 4 ++L(align_four_vec_loop): ++ VMOVA (%rax), %YMM0 ++ VMOVA (VEC_SIZE * 2)(%rax), %YMM1 ++ vpminub VEC_SIZE(%rax), %YMM0, %YMM0 ++ vpminub (VEC_SIZE * 3)(%rax), %YMM1, %YMM1 ++ vpminub %YMM0, %YMM1, %YMM0 ++ /* If K0 != 0, there is a null byte. */ ++ vpcmpb $0, %YMM0, %YMMZERO, %k0 ++ add $(VEC_SIZE * 4), %rax ++ ktestd %k0, %k0 ++ jz L(align_four_vec_loop) ++ ++ vpcmpb $0, -(VEC_SIZE * 4)(%rax), %YMMZERO, %k0 ++ sub $(VEC_SIZE * 5), %rax ++ kmovd %k0, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_second_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 ++ kmovd %k1, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_third_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 ++ kmovd %k2, %edx ++ test %edx, %edx ++ jnz L(exit_null_on_fourth_vector) ++ ++ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 ++ kmovd %k3, %edx ++ sub %rdi, %rax ++ bsf %rdx, %rdx ++ add %rdx, %rax ++ add $(VEC_SIZE * 4), %rax ++ jmp L(StartStrcpyPart) ++ ++ .p2align 4 ++L(exit): ++ sub %rdi, %rax ++L(exit_null_on_first_vector): ++ bsf %rdx, %rdx ++ add %rdx, %rax ++ jmp L(StartStrcpyPart) ++ ++ .p2align 4 ++L(exit_null_on_second_vector): ++ sub %rdi, %rax ++ bsf %rdx, %rdx ++ add %rdx, %rax ++ add $VEC_SIZE, %rax ++ jmp L(StartStrcpyPart) ++ ++ .p2align 4 ++L(exit_null_on_third_vector): ++ sub %rdi, %rax ++ bsf %rdx, %rdx ++ add %rdx, %rax ++ add $(VEC_SIZE * 2), %rax ++ jmp L(StartStrcpyPart) ++ ++ .p2align 4 ++L(exit_null_on_fourth_vector): ++ sub %rdi, %rax ++ bsf %rdx, %rdx ++ add %rdx, %rax ++ add $(VEC_SIZE * 3), %rax ++ jmp L(StartStrcpyPart) ++ ++ .p2align 4 ++L(exit_null_on_fifth_vector): ++ sub %rdi, %rax ++ bsf %rdx, %rdx ++ add %rdx, %rax ++ add $(VEC_SIZE * 4), %rax ++ ++ .p2align 4 ++L(StartStrcpyPart): ++ lea (%r9, %rax), %rdi ++ mov %rsi, %rcx ++ mov %r9, %rax /* save result */ ++ ++# ifdef USE_AS_STRNCAT ++ test %r8, %r8 ++ jz L(ExitZero) ++# define USE_AS_STRNCPY ++# endif ++ ++# include "strcpy-evex.S" ++#endif +diff --git a/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S +new file mode 100644 +index 0000000000..81f20d1d8e +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S +@@ -0,0 +1,12 @@ ++#ifndef STRCHR ++# define STRCHR __strchr_avx2_rtm ++#endif ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define SECTION(p) p##.avx.rtm ++ ++#include "strchr-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S +index 39fc69da7b..0a5217514a 100644 +--- a/sysdeps/x86_64/multiarch/strchr-avx2.S ++++ b/sysdeps/x86_64/multiarch/strchr-avx2.S +@@ -38,9 +38,13 @@ + # define VZEROUPPER vzeroupper + # endif + ++# ifndef SECTION ++# define SECTION(p) p##.avx ++# endif ++ + # define VEC_SIZE 32 + +- .section .text.avx,"ax",@progbits ++ .section SECTION(.text),"ax",@progbits + ENTRY (STRCHR) + movl %edi, %ecx + /* Broadcast CHAR to YMM0. */ +@@ -93,8 +97,8 @@ L(cros_page_boundary): + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax + # endif +- VZEROUPPER +- ret ++L(return_vzeroupper): ++ ZERO_UPPER_VEC_REGISTERS_RETURN + + .p2align 4 + L(aligned_more): +@@ -190,8 +194,7 @@ L(first_vec_x0): + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(first_vec_x1): +@@ -205,8 +208,7 @@ L(first_vec_x1): + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(first_vec_x2): +@@ -220,8 +222,7 @@ L(first_vec_x2): + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(4x_vec_end): +@@ -247,8 +248,7 @@ L(first_vec_x3): + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + END (STRCHR) + #endif +diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S +new file mode 100644 +index 0000000000..ddc86a7058 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strchr-evex.S +@@ -0,0 +1,335 @@ ++/* strchr/strchrnul optimized with 256-bit EVEX instructions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++ ++# include <sysdep.h> ++ ++# ifndef STRCHR ++# define STRCHR __strchr_evex ++# endif ++ ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 ++ ++# ifdef USE_AS_WCSCHR ++# define VPBROADCAST vpbroadcastd ++# define VPCMP vpcmpd ++# define VPMINU vpminud ++# define CHAR_REG esi ++# define SHIFT_REG r8d ++# else ++# define VPBROADCAST vpbroadcastb ++# define VPCMP vpcmpb ++# define VPMINU vpminub ++# define CHAR_REG sil ++# define SHIFT_REG ecx ++# endif ++ ++# define XMMZERO xmm16 ++ ++# define YMMZERO ymm16 ++# define YMM0 ymm17 ++# define YMM1 ymm18 ++# define YMM2 ymm19 ++# define YMM3 ymm20 ++# define YMM4 ymm21 ++# define YMM5 ymm22 ++# define YMM6 ymm23 ++# define YMM7 ymm24 ++# define YMM8 ymm25 ++ ++# define VEC_SIZE 32 ++# define PAGE_SIZE 4096 ++ ++ .section .text.evex,"ax",@progbits ++ENTRY (STRCHR) ++ movl %edi, %ecx ++# ifndef USE_AS_STRCHRNUL ++ xorl %edx, %edx ++# endif ++ ++ /* Broadcast CHAR to YMM0. */ ++ VPBROADCAST %esi, %YMM0 ++ ++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO ++ ++ /* Check if we cross page boundary with one vector load. */ ++ andl $(PAGE_SIZE - 1), %ecx ++ cmpl $(PAGE_SIZE - VEC_SIZE), %ecx ++ ja L(cross_page_boundary) ++ ++ /* Check the first VEC_SIZE bytes. Search for both CHAR and the ++ null bytes. */ ++ VMOVU (%rdi), %YMM1 ++ ++ /* Leaves only CHARS matching esi as 0. */ ++ vpxorq %YMM1, %YMM0, %YMM2 ++ VPMINU %YMM2, %YMM1, %YMM2 ++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM2, %k0 ++ ktestd %k0, %k0 ++ jz L(more_vecs) ++ kmovd %k0, %eax ++ tzcntl %eax, %eax ++ /* Found CHAR or the null byte. */ ++# ifdef USE_AS_WCSCHR ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ leaq (%rdi, %rax, 4), %rax ++# else ++ addq %rdi, %rax ++# endif ++# ifndef USE_AS_STRCHRNUL ++ cmp (%rax), %CHAR_REG ++ cmovne %rdx, %rax ++# endif ++ ret ++ ++ .p2align 4 ++L(more_vecs): ++ /* Align data for aligned loads in the loop. */ ++ andq $-VEC_SIZE, %rdi ++L(aligned_more): ++ ++ /* Check the next 4 * VEC_SIZE. Only one VEC_SIZE at a time ++ since data is only aligned to VEC_SIZE. */ ++ VMOVA VEC_SIZE(%rdi), %YMM1 ++ addq $VEC_SIZE, %rdi ++ ++ /* Leaves only CHARS matching esi as 0. */ ++ vpxorq %YMM1, %YMM0, %YMM2 ++ VPMINU %YMM2, %YMM1, %YMM2 ++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM2, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x0) ++ ++ VMOVA VEC_SIZE(%rdi), %YMM1 ++ /* Leaves only CHARS matching esi as 0. */ ++ vpxorq %YMM1, %YMM0, %YMM2 ++ VPMINU %YMM2, %YMM1, %YMM2 ++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM2, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x1) ++ ++ VMOVA (VEC_SIZE * 2)(%rdi), %YMM1 ++ /* Leaves only CHARS matching esi as 0. */ ++ vpxorq %YMM1, %YMM0, %YMM2 ++ VPMINU %YMM2, %YMM1, %YMM2 ++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM2, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x2) ++ ++ VMOVA (VEC_SIZE * 3)(%rdi), %YMM1 ++ /* Leaves only CHARS matching esi as 0. */ ++ vpxorq %YMM1, %YMM0, %YMM2 ++ VPMINU %YMM2, %YMM1, %YMM2 ++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM2, %k0 ++ ktestd %k0, %k0 ++ jz L(prep_loop_4x) ++ ++ kmovd %k0, %eax ++ tzcntl %eax, %eax ++ /* Found CHAR or the null byte. */ ++# ifdef USE_AS_WCSCHR ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ leaq (VEC_SIZE * 3)(%rdi, %rax, 4), %rax ++# else ++ leaq (VEC_SIZE * 3)(%rdi, %rax), %rax ++# endif ++# ifndef USE_AS_STRCHRNUL ++ cmp (%rax), %CHAR_REG ++ cmovne %rdx, %rax ++# endif ++ ret ++ ++ .p2align 4 ++L(first_vec_x0): ++ tzcntl %eax, %eax ++ /* Found CHAR or the null byte. */ ++# ifdef USE_AS_WCSCHR ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ leaq (%rdi, %rax, 4), %rax ++# else ++ addq %rdi, %rax ++# endif ++# ifndef USE_AS_STRCHRNUL ++ cmp (%rax), %CHAR_REG ++ cmovne %rdx, %rax ++# endif ++ ret ++ ++ .p2align 4 ++L(first_vec_x1): ++ tzcntl %eax, %eax ++ /* Found CHAR or the null byte. */ ++# ifdef USE_AS_WCSCHR ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ leaq VEC_SIZE(%rdi, %rax, 4), %rax ++# else ++ leaq VEC_SIZE(%rdi, %rax), %rax ++# endif ++# ifndef USE_AS_STRCHRNUL ++ cmp (%rax), %CHAR_REG ++ cmovne %rdx, %rax ++# endif ++ ret ++ ++ .p2align 4 ++L(first_vec_x2): ++ tzcntl %eax, %eax ++ /* Found CHAR or the null byte. */ ++# ifdef USE_AS_WCSCHR ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax ++# else ++ leaq (VEC_SIZE * 2)(%rdi, %rax), %rax ++# endif ++# ifndef USE_AS_STRCHRNUL ++ cmp (%rax), %CHAR_REG ++ cmovne %rdx, %rax ++# endif ++ ret ++ ++L(prep_loop_4x): ++ /* Align data to 4 * VEC_SIZE. */ ++ andq $-(VEC_SIZE * 4), %rdi ++ ++ .p2align 4 ++L(loop_4x_vec): ++ /* Compare 4 * VEC at a time forward. */ ++ VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 ++ VMOVA (VEC_SIZE * 5)(%rdi), %YMM2 ++ VMOVA (VEC_SIZE * 6)(%rdi), %YMM3 ++ VMOVA (VEC_SIZE * 7)(%rdi), %YMM4 ++ ++ /* Leaves only CHARS matching esi as 0. */ ++ vpxorq %YMM1, %YMM0, %YMM5 ++ vpxorq %YMM2, %YMM0, %YMM6 ++ vpxorq %YMM3, %YMM0, %YMM7 ++ vpxorq %YMM4, %YMM0, %YMM8 ++ ++ VPMINU %YMM5, %YMM1, %YMM5 ++ VPMINU %YMM6, %YMM2, %YMM6 ++ VPMINU %YMM7, %YMM3, %YMM7 ++ VPMINU %YMM8, %YMM4, %YMM8 ++ ++ VPMINU %YMM5, %YMM6, %YMM1 ++ VPMINU %YMM7, %YMM8, %YMM2 ++ ++ VPMINU %YMM1, %YMM2, %YMM1 ++ ++ /* Each bit in K0 represents a CHAR or a null byte. */ ++ VPCMP $0, %YMMZERO, %YMM1, %k0 ++ ++ addq $(VEC_SIZE * 4), %rdi ++ ++ ktestd %k0, %k0 ++ jz L(loop_4x_vec) ++ ++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM5, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x0) ++ ++ /* Each bit in K1 represents a CHAR or a null byte in YMM2. */ ++ VPCMP $0, %YMMZERO, %YMM6, %k1 ++ kmovd %k1, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x1) ++ ++ /* Each bit in K2 represents a CHAR or a null byte in YMM3. */ ++ VPCMP $0, %YMMZERO, %YMM7, %k2 ++ /* Each bit in K3 represents a CHAR or a null byte in YMM4. */ ++ VPCMP $0, %YMMZERO, %YMM8, %k3 ++ ++# ifdef USE_AS_WCSCHR ++ /* NB: Each bit in K2/K3 represents 4-byte element. */ ++ kshiftlw $8, %k3, %k1 ++# else ++ kshiftlq $32, %k3, %k1 ++# endif ++ ++ /* Each bit in K1 represents a NULL or a mismatch. */ ++ korq %k1, %k2, %k1 ++ kmovq %k1, %rax ++ ++ tzcntq %rax, %rax ++# ifdef USE_AS_WCSCHR ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax ++# else ++ leaq (VEC_SIZE * 2)(%rdi, %rax), %rax ++# endif ++# ifndef USE_AS_STRCHRNUL ++ cmp (%rax), %CHAR_REG ++ cmovne %rdx, %rax ++# endif ++ ret ++ ++ /* Cold case for crossing page with first load. */ ++ .p2align 4 ++L(cross_page_boundary): ++ andq $-VEC_SIZE, %rdi ++ andl $(VEC_SIZE - 1), %ecx ++ ++ VMOVA (%rdi), %YMM1 ++ ++ /* Leaves only CHARS matching esi as 0. */ ++ vpxorq %YMM1, %YMM0, %YMM2 ++ VPMINU %YMM2, %YMM1, %YMM2 ++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM2, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ ++# ifdef USE_AS_WCSCHR ++ /* NB: Divide shift count by 4 since each bit in K1 represent 4 ++ bytes. */ ++ movl %ecx, %SHIFT_REG ++ sarl $2, %SHIFT_REG ++# endif ++ ++ /* Remove the leading bits. */ ++ sarxl %SHIFT_REG, %eax, %eax ++ testl %eax, %eax ++ ++ jz L(aligned_more) ++ tzcntl %eax, %eax ++ addq %rcx, %rdi ++# ifdef USE_AS_WCSCHR ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ leaq (%rdi, %rax, 4), %rax ++# else ++ addq %rdi, %rax ++# endif ++# ifndef USE_AS_STRCHRNUL ++ cmp (%rax), %CHAR_REG ++ cmovne %rdx, %rax ++# endif ++ ret ++ ++END (STRCHR) ++# endif +diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c +index 8df4609bf8..4ed1177c70 100644 +--- a/sysdeps/x86_64/multiarch/strchr.c ++++ b/sysdeps/x86_64/multiarch/strchr.c +@@ -29,16 +29,28 @@ + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + +- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) +- && CPU_FEATURE_USABLE_P (cpu_features, AVX2) ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) +- return OPTIMIZE (avx2); ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) ++ return OPTIMIZE (evex); ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ return OPTIMIZE (avx2_rtm); ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) ++ return OPTIMIZE (avx2); ++ } + + if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF)) + return OPTIMIZE (sse2_no_bsf); +diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S +new file mode 100644 +index 0000000000..cdcf818b91 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S +@@ -0,0 +1,3 @@ ++#define STRCHR __strchrnul_avx2_rtm ++#define USE_AS_STRCHRNUL 1 ++#include "strchr-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/strchrnul-evex.S b/sysdeps/x86_64/multiarch/strchrnul-evex.S +new file mode 100644 +index 0000000000..064fe7ca9e +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strchrnul-evex.S +@@ -0,0 +1,3 @@ ++#define STRCHR __strchrnul_evex ++#define USE_AS_STRCHRNUL 1 ++#include "strchr-evex.S" +diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S +new file mode 100644 +index 0000000000..aecd30d97f +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S +@@ -0,0 +1,12 @@ ++#ifndef STRCMP ++# define STRCMP __strcmp_avx2_rtm ++#endif ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define SECTION(p) p##.avx.rtm ++ ++#include "strcmp-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S +index d42b04b54f..759e5b64c2 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S +@@ -55,6 +55,10 @@ + # define VZEROUPPER vzeroupper + # endif + ++# ifndef SECTION ++# define SECTION(p) p##.avx ++# endif ++ + /* Warning! + wcscmp/wcsncmp have to use SIGNED comparison for elements. + strcmp/strncmp have to use UNSIGNED comparison for elements. +@@ -75,7 +79,7 @@ + the maximum offset is reached before a difference is found, zero is + returned. */ + +- .section .text.avx,"ax",@progbits ++ .section SECTION(.text),"ax",@progbits + ENTRY (STRCMP) + # ifdef USE_AS_STRNCMP + /* Check for simple cases (0 or 1) in offset. */ +@@ -83,6 +87,16 @@ ENTRY (STRCMP) + je L(char0) + jb L(zero) + # ifdef USE_AS_WCSCMP ++# ifndef __ILP32__ ++ movq %rdx, %rcx ++ /* Check if length could overflow when multiplied by ++ sizeof(wchar_t). Checking top 8 bits will cover all potential ++ overflow cases as well as redirect cases where its impossible to ++ length to bound a valid memory region. In these cases just use ++ 'wcscmp'. */ ++ shrq $56, %rcx ++ jnz __wcscmp_avx2 ++# endif + /* Convert units: from wide to byte char. */ + shl $2, %RDX_LP + # endif +@@ -127,8 +141,8 @@ L(return): + movzbl (%rsi, %rdx), %edx + subl %edx, %eax + # endif +- VZEROUPPER +- ret ++L(return_vzeroupper): ++ ZERO_UPPER_VEC_REGISTERS_RETURN + + .p2align 4 + L(return_vec_size): +@@ -161,8 +175,7 @@ L(return_vec_size): + subl %edx, %eax + # endif + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(return_2_vec_size): +@@ -195,8 +208,7 @@ L(return_2_vec_size): + subl %edx, %eax + # endif + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(return_3_vec_size): +@@ -229,8 +241,7 @@ L(return_3_vec_size): + subl %edx, %eax + # endif + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(next_3_vectors): +@@ -356,8 +367,7 @@ L(back_to_loop): + subl %edx, %eax + # endif + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(test_vec): +@@ -400,8 +410,7 @@ L(test_vec): + subl %edx, %eax + # endif + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(test_2_vec): +@@ -444,8 +453,7 @@ L(test_2_vec): + subl %edx, %eax + # endif + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(test_3_vec): +@@ -486,8 +494,7 @@ L(test_3_vec): + subl %edx, %eax + # endif + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(loop_cross_page): +@@ -556,8 +563,7 @@ L(loop_cross_page): + subl %edx, %eax + # endif + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(loop_cross_page_2_vec): +@@ -631,8 +637,7 @@ L(loop_cross_page_2_vec): + subl %edx, %eax + # endif + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + # ifdef USE_AS_STRNCMP + L(string_nbyte_offset_check): +@@ -674,8 +679,7 @@ L(cross_page_loop): + # ifndef USE_AS_WCSCMP + L(different): + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + # ifdef USE_AS_WCSCMP + .p2align 4 +@@ -685,16 +689,14 @@ L(different): + setl %al + negl %eax + orl $1, %eax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + # endif + + # ifdef USE_AS_STRNCMP + .p2align 4 + L(zero): + xorl %eax, %eax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(char0): +@@ -708,8 +710,7 @@ L(char0): + movzbl (%rdi), %eax + subl %ecx, %eax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + # endif + + .p2align 4 +@@ -734,8 +735,7 @@ L(last_vector): + movzbl (%rsi, %rdx), %edx + subl %edx, %eax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + /* Comparing on page boundary region requires special treatment: + It must done one vector at the time, starting with the wider +@@ -856,7 +856,6 @@ L(cross_page_4bytes): + testl %eax, %eax + jne L(cross_page_loop) + subl %ecx, %eax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + END (STRCMP) + #endif +diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S +new file mode 100644 +index 0000000000..459eeed09f +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S +@@ -0,0 +1,1043 @@ ++/* strcmp/wcscmp/strncmp/wcsncmp optimized with 256-bit EVEX instructions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++ ++# include <sysdep.h> ++ ++# ifndef STRCMP ++# define STRCMP __strcmp_evex ++# endif ++ ++# define PAGE_SIZE 4096 ++ ++/* VEC_SIZE = Number of bytes in a ymm register */ ++# define VEC_SIZE 32 ++ ++/* Shift for dividing by (VEC_SIZE * 4). */ ++# define DIVIDE_BY_VEC_4_SHIFT 7 ++# if (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT) ++# error (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT) ++# endif ++ ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 ++ ++# ifdef USE_AS_WCSCMP ++/* Compare packed dwords. */ ++# define VPCMP vpcmpd ++# define SHIFT_REG32 r8d ++# define SHIFT_REG64 r8 ++/* 1 dword char == 4 bytes. */ ++# define SIZE_OF_CHAR 4 ++# else ++/* Compare packed bytes. */ ++# define VPCMP vpcmpb ++# define SHIFT_REG32 ecx ++# define SHIFT_REG64 rcx ++/* 1 byte char == 1 byte. */ ++# define SIZE_OF_CHAR 1 ++# endif ++ ++# define XMMZERO xmm16 ++# define XMM0 xmm17 ++# define XMM1 xmm18 ++ ++# define YMMZERO ymm16 ++# define YMM0 ymm17 ++# define YMM1 ymm18 ++# define YMM2 ymm19 ++# define YMM3 ymm20 ++# define YMM4 ymm21 ++# define YMM5 ymm22 ++# define YMM6 ymm23 ++# define YMM7 ymm24 ++ ++/* Warning! ++ wcscmp/wcsncmp have to use SIGNED comparison for elements. ++ strcmp/strncmp have to use UNSIGNED comparison for elements. ++*/ ++ ++/* The main idea of the string comparison (byte or dword) using 256-bit ++ EVEX instructions consists of comparing (VPCMP) two ymm vectors. The ++ latter can be on either packed bytes or dwords depending on ++ USE_AS_WCSCMP. In order to check the null char, algorithm keeps the ++ matched bytes/dwords, requiring 5 EVEX instructions (3 VPCMP and 2 ++ KORD). In general, the costs of comparing VEC_SIZE bytes (32-bytes) ++ are 3 VPCMP and 2 KORD instructions, together with VMOVU and ktestd ++ instructions. Main loop (away from from page boundary) compares 4 ++ vectors are a time, effectively comparing 4 x VEC_SIZE bytes (128 ++ bytes) on each loop. ++ ++ The routine strncmp/wcsncmp (enabled by defining USE_AS_STRNCMP) logic ++ is the same as strcmp, except that an a maximum offset is tracked. If ++ the maximum offset is reached before a difference is found, zero is ++ returned. */ ++ ++ .section .text.evex,"ax",@progbits ++ENTRY (STRCMP) ++# ifdef USE_AS_STRNCMP ++ /* Check for simple cases (0 or 1) in offset. */ ++ cmp $1, %RDX_LP ++ je L(char0) ++ jb L(zero) ++# ifdef USE_AS_WCSCMP ++ /* Convert units: from wide to byte char. */ ++ shl $2, %RDX_LP ++# endif ++ /* Register %r11 tracks the maximum offset. */ ++ mov %RDX_LP, %R11_LP ++# endif ++ movl %edi, %eax ++ xorl %edx, %edx ++ /* Make %XMMZERO (%YMMZERO) all zeros in this function. */ ++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO ++ orl %esi, %eax ++ andl $(PAGE_SIZE - 1), %eax ++ cmpl $(PAGE_SIZE - (VEC_SIZE * 4)), %eax ++ jg L(cross_page) ++ /* Start comparing 4 vectors. */ ++ VMOVU (%rdi), %YMM0 ++ VMOVU (%rsi), %YMM1 ++ ++ /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ ++ VPCMP $4, %YMM0, %YMM1, %k0 ++ ++ /* Check for NULL in YMM0. */ ++ VPCMP $0, %YMMZERO, %YMM0, %k1 ++ /* Check for NULL in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM1, %k2 ++ /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ ++ kord %k1, %k2, %k1 ++ ++ /* Each bit in K1 represents: ++ 1. A mismatch in YMM0 and YMM1. Or ++ 2. A NULL in YMM0 or YMM1. ++ */ ++ kord %k0, %k1, %k1 ++ ++ ktestd %k1, %k1 ++ je L(next_3_vectors) ++ kmovd %k1, %ecx ++ tzcntl %ecx, %edx ++# ifdef USE_AS_WCSCMP ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ sall $2, %edx ++# endif ++# ifdef USE_AS_STRNCMP ++ /* Return 0 if the mismatched index (%rdx) is after the maximum ++ offset (%r11). */ ++ cmpq %r11, %rdx ++ jae L(zero) ++# endif ++# ifdef USE_AS_WCSCMP ++ xorl %eax, %eax ++ movl (%rdi, %rdx), %ecx ++ cmpl (%rsi, %rdx), %ecx ++ je L(return) ++L(wcscmp_return): ++ setl %al ++ negl %eax ++ orl $1, %eax ++L(return): ++# else ++ movzbl (%rdi, %rdx), %eax ++ movzbl (%rsi, %rdx), %edx ++ subl %edx, %eax ++# endif ++ ret ++ ++ .p2align 4 ++L(return_vec_size): ++ kmovd %k1, %ecx ++ tzcntl %ecx, %edx ++# ifdef USE_AS_WCSCMP ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ sall $2, %edx ++# endif ++# ifdef USE_AS_STRNCMP ++ /* Return 0 if the mismatched index (%rdx + VEC_SIZE) is after ++ the maximum offset (%r11). */ ++ addq $VEC_SIZE, %rdx ++ cmpq %r11, %rdx ++ jae L(zero) ++# ifdef USE_AS_WCSCMP ++ xorl %eax, %eax ++ movl (%rdi, %rdx), %ecx ++ cmpl (%rsi, %rdx), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl (%rdi, %rdx), %eax ++ movzbl (%rsi, %rdx), %edx ++ subl %edx, %eax ++# endif ++# else ++# ifdef USE_AS_WCSCMP ++ xorl %eax, %eax ++ movl VEC_SIZE(%rdi, %rdx), %ecx ++ cmpl VEC_SIZE(%rsi, %rdx), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl VEC_SIZE(%rdi, %rdx), %eax ++ movzbl VEC_SIZE(%rsi, %rdx), %edx ++ subl %edx, %eax ++# endif ++# endif ++ ret ++ ++ .p2align 4 ++L(return_2_vec_size): ++ kmovd %k1, %ecx ++ tzcntl %ecx, %edx ++# ifdef USE_AS_WCSCMP ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ sall $2, %edx ++# endif ++# ifdef USE_AS_STRNCMP ++ /* Return 0 if the mismatched index (%rdx + 2 * VEC_SIZE) is ++ after the maximum offset (%r11). */ ++ addq $(VEC_SIZE * 2), %rdx ++ cmpq %r11, %rdx ++ jae L(zero) ++# ifdef USE_AS_WCSCMP ++ xorl %eax, %eax ++ movl (%rdi, %rdx), %ecx ++ cmpl (%rsi, %rdx), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl (%rdi, %rdx), %eax ++ movzbl (%rsi, %rdx), %edx ++ subl %edx, %eax ++# endif ++# else ++# ifdef USE_AS_WCSCMP ++ xorl %eax, %eax ++ movl (VEC_SIZE * 2)(%rdi, %rdx), %ecx ++ cmpl (VEC_SIZE * 2)(%rsi, %rdx), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl (VEC_SIZE * 2)(%rdi, %rdx), %eax ++ movzbl (VEC_SIZE * 2)(%rsi, %rdx), %edx ++ subl %edx, %eax ++# endif ++# endif ++ ret ++ ++ .p2align 4 ++L(return_3_vec_size): ++ kmovd %k1, %ecx ++ tzcntl %ecx, %edx ++# ifdef USE_AS_WCSCMP ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ sall $2, %edx ++# endif ++# ifdef USE_AS_STRNCMP ++ /* Return 0 if the mismatched index (%rdx + 3 * VEC_SIZE) is ++ after the maximum offset (%r11). */ ++ addq $(VEC_SIZE * 3), %rdx ++ cmpq %r11, %rdx ++ jae L(zero) ++# ifdef USE_AS_WCSCMP ++ xorl %eax, %eax ++ movl (%rdi, %rdx), %ecx ++ cmpl (%rsi, %rdx), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl (%rdi, %rdx), %eax ++ movzbl (%rsi, %rdx), %edx ++ subl %edx, %eax ++# endif ++# else ++# ifdef USE_AS_WCSCMP ++ xorl %eax, %eax ++ movl (VEC_SIZE * 3)(%rdi, %rdx), %ecx ++ cmpl (VEC_SIZE * 3)(%rsi, %rdx), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl (VEC_SIZE * 3)(%rdi, %rdx), %eax ++ movzbl (VEC_SIZE * 3)(%rsi, %rdx), %edx ++ subl %edx, %eax ++# endif ++# endif ++ ret ++ ++ .p2align 4 ++L(next_3_vectors): ++ VMOVU VEC_SIZE(%rdi), %YMM0 ++ VMOVU VEC_SIZE(%rsi), %YMM1 ++ /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ ++ VPCMP $4, %YMM0, %YMM1, %k0 ++ VPCMP $0, %YMMZERO, %YMM0, %k1 ++ VPCMP $0, %YMMZERO, %YMM1, %k2 ++ /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ ++ kord %k1, %k2, %k1 ++ /* Each bit in K1 represents a NULL or a mismatch. */ ++ kord %k0, %k1, %k1 ++ ktestd %k1, %k1 ++ jne L(return_vec_size) ++ ++ VMOVU (VEC_SIZE * 2)(%rdi), %YMM2 ++ VMOVU (VEC_SIZE * 3)(%rdi), %YMM3 ++ VMOVU (VEC_SIZE * 2)(%rsi), %YMM4 ++ VMOVU (VEC_SIZE * 3)(%rsi), %YMM5 ++ ++ /* Each bit in K0 represents a mismatch in YMM2 and YMM4. */ ++ VPCMP $4, %YMM2, %YMM4, %k0 ++ VPCMP $0, %YMMZERO, %YMM2, %k1 ++ VPCMP $0, %YMMZERO, %YMM4, %k2 ++ /* Each bit in K1 represents a NULL in YMM2 or YMM4. */ ++ kord %k1, %k2, %k1 ++ /* Each bit in K1 represents a NULL or a mismatch. */ ++ kord %k0, %k1, %k1 ++ ktestd %k1, %k1 ++ jne L(return_2_vec_size) ++ ++ /* Each bit in K0 represents a mismatch in YMM3 and YMM5. */ ++ VPCMP $4, %YMM3, %YMM5, %k0 ++ VPCMP $0, %YMMZERO, %YMM3, %k1 ++ VPCMP $0, %YMMZERO, %YMM5, %k2 ++ /* Each bit in K1 represents a NULL in YMM3 or YMM5. */ ++ kord %k1, %k2, %k1 ++ /* Each bit in K1 represents a NULL or a mismatch. */ ++ kord %k0, %k1, %k1 ++ ktestd %k1, %k1 ++ jne L(return_3_vec_size) ++L(main_loop_header): ++ leaq (VEC_SIZE * 4)(%rdi), %rdx ++ movl $PAGE_SIZE, %ecx ++ /* Align load via RAX. */ ++ andq $-(VEC_SIZE * 4), %rdx ++ subq %rdi, %rdx ++ leaq (%rdi, %rdx), %rax ++# ifdef USE_AS_STRNCMP ++ /* Starting from this point, the maximum offset, or simply the ++ 'offset', DECREASES by the same amount when base pointers are ++ moved forward. Return 0 when: ++ 1) On match: offset <= the matched vector index. ++ 2) On mistmach, offset is before the mistmatched index. ++ */ ++ subq %rdx, %r11 ++ jbe L(zero) ++# endif ++ addq %rsi, %rdx ++ movq %rdx, %rsi ++ andl $(PAGE_SIZE - 1), %esi ++ /* Number of bytes before page crossing. */ ++ subq %rsi, %rcx ++ /* Number of VEC_SIZE * 4 blocks before page crossing. */ ++ shrq $DIVIDE_BY_VEC_4_SHIFT, %rcx ++ /* ESI: Number of VEC_SIZE * 4 blocks before page crossing. */ ++ movl %ecx, %esi ++ jmp L(loop_start) ++ ++ .p2align 4 ++L(loop): ++# ifdef USE_AS_STRNCMP ++ /* Base pointers are moved forward by 4 * VEC_SIZE. Decrease ++ the maximum offset (%r11) by the same amount. */ ++ subq $(VEC_SIZE * 4), %r11 ++ jbe L(zero) ++# endif ++ addq $(VEC_SIZE * 4), %rax ++ addq $(VEC_SIZE * 4), %rdx ++L(loop_start): ++ testl %esi, %esi ++ leal -1(%esi), %esi ++ je L(loop_cross_page) ++L(back_to_loop): ++ /* Main loop, comparing 4 vectors are a time. */ ++ VMOVA (%rax), %YMM0 ++ VMOVA VEC_SIZE(%rax), %YMM2 ++ VMOVA (VEC_SIZE * 2)(%rax), %YMM4 ++ VMOVA (VEC_SIZE * 3)(%rax), %YMM6 ++ VMOVU (%rdx), %YMM1 ++ VMOVU VEC_SIZE(%rdx), %YMM3 ++ VMOVU (VEC_SIZE * 2)(%rdx), %YMM5 ++ VMOVU (VEC_SIZE * 3)(%rdx), %YMM7 ++ ++ VPCMP $4, %YMM0, %YMM1, %k0 ++ VPCMP $0, %YMMZERO, %YMM0, %k1 ++ VPCMP $0, %YMMZERO, %YMM1, %k2 ++ kord %k1, %k2, %k1 ++ /* Each bit in K4 represents a NULL or a mismatch in YMM0 and ++ YMM1. */ ++ kord %k0, %k1, %k4 ++ ++ VPCMP $4, %YMM2, %YMM3, %k0 ++ VPCMP $0, %YMMZERO, %YMM2, %k1 ++ VPCMP $0, %YMMZERO, %YMM3, %k2 ++ kord %k1, %k2, %k1 ++ /* Each bit in K5 represents a NULL or a mismatch in YMM2 and ++ YMM3. */ ++ kord %k0, %k1, %k5 ++ ++ VPCMP $4, %YMM4, %YMM5, %k0 ++ VPCMP $0, %YMMZERO, %YMM4, %k1 ++ VPCMP $0, %YMMZERO, %YMM5, %k2 ++ kord %k1, %k2, %k1 ++ /* Each bit in K6 represents a NULL or a mismatch in YMM4 and ++ YMM5. */ ++ kord %k0, %k1, %k6 ++ ++ VPCMP $4, %YMM6, %YMM7, %k0 ++ VPCMP $0, %YMMZERO, %YMM6, %k1 ++ VPCMP $0, %YMMZERO, %YMM7, %k2 ++ kord %k1, %k2, %k1 ++ /* Each bit in K7 represents a NULL or a mismatch in YMM6 and ++ YMM7. */ ++ kord %k0, %k1, %k7 ++ ++ kord %k4, %k5, %k0 ++ kord %k6, %k7, %k1 ++ ++ /* Test each mask (32 bits) individually because for VEC_SIZE ++ == 32 is not possible to OR the four masks and keep all bits ++ in a 64-bit integer register, differing from SSE2 strcmp ++ where ORing is possible. */ ++ kortestd %k0, %k1 ++ je L(loop) ++ ktestd %k4, %k4 ++ je L(test_vec) ++ kmovd %k4, %edi ++ tzcntl %edi, %ecx ++# ifdef USE_AS_WCSCMP ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ sall $2, %ecx ++# endif ++# ifdef USE_AS_STRNCMP ++ cmpq %rcx, %r11 ++ jbe L(zero) ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl (%rsi, %rcx), %edi ++ cmpl (%rdx, %rcx), %edi ++ jne L(wcscmp_return) ++# else ++ movzbl (%rax, %rcx), %eax ++ movzbl (%rdx, %rcx), %edx ++ subl %edx, %eax ++# endif ++# else ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl (%rsi, %rcx), %edi ++ cmpl (%rdx, %rcx), %edi ++ jne L(wcscmp_return) ++# else ++ movzbl (%rax, %rcx), %eax ++ movzbl (%rdx, %rcx), %edx ++ subl %edx, %eax ++# endif ++# endif ++ ret ++ ++ .p2align 4 ++L(test_vec): ++# ifdef USE_AS_STRNCMP ++ /* The first vector matched. Return 0 if the maximum offset ++ (%r11) <= VEC_SIZE. */ ++ cmpq $VEC_SIZE, %r11 ++ jbe L(zero) ++# endif ++ ktestd %k5, %k5 ++ je L(test_2_vec) ++ kmovd %k5, %ecx ++ tzcntl %ecx, %edi ++# ifdef USE_AS_WCSCMP ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ sall $2, %edi ++# endif ++# ifdef USE_AS_STRNCMP ++ addq $VEC_SIZE, %rdi ++ cmpq %rdi, %r11 ++ jbe L(zero) ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl (%rsi, %rdi), %ecx ++ cmpl (%rdx, %rdi), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl (%rax, %rdi), %eax ++ movzbl (%rdx, %rdi), %edx ++ subl %edx, %eax ++# endif ++# else ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl VEC_SIZE(%rsi, %rdi), %ecx ++ cmpl VEC_SIZE(%rdx, %rdi), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl VEC_SIZE(%rax, %rdi), %eax ++ movzbl VEC_SIZE(%rdx, %rdi), %edx ++ subl %edx, %eax ++# endif ++# endif ++ ret ++ ++ .p2align 4 ++L(test_2_vec): ++# ifdef USE_AS_STRNCMP ++ /* The first 2 vectors matched. Return 0 if the maximum offset ++ (%r11) <= 2 * VEC_SIZE. */ ++ cmpq $(VEC_SIZE * 2), %r11 ++ jbe L(zero) ++# endif ++ ktestd %k6, %k6 ++ je L(test_3_vec) ++ kmovd %k6, %ecx ++ tzcntl %ecx, %edi ++# ifdef USE_AS_WCSCMP ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ sall $2, %edi ++# endif ++# ifdef USE_AS_STRNCMP ++ addq $(VEC_SIZE * 2), %rdi ++ cmpq %rdi, %r11 ++ jbe L(zero) ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl (%rsi, %rdi), %ecx ++ cmpl (%rdx, %rdi), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl (%rax, %rdi), %eax ++ movzbl (%rdx, %rdi), %edx ++ subl %edx, %eax ++# endif ++# else ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl (VEC_SIZE * 2)(%rsi, %rdi), %ecx ++ cmpl (VEC_SIZE * 2)(%rdx, %rdi), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl (VEC_SIZE * 2)(%rax, %rdi), %eax ++ movzbl (VEC_SIZE * 2)(%rdx, %rdi), %edx ++ subl %edx, %eax ++# endif ++# endif ++ ret ++ ++ .p2align 4 ++L(test_3_vec): ++# ifdef USE_AS_STRNCMP ++ /* The first 3 vectors matched. Return 0 if the maximum offset ++ (%r11) <= 3 * VEC_SIZE. */ ++ cmpq $(VEC_SIZE * 3), %r11 ++ jbe L(zero) ++# endif ++ kmovd %k7, %esi ++ tzcntl %esi, %ecx ++# ifdef USE_AS_WCSCMP ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ sall $2, %ecx ++# endif ++# ifdef USE_AS_STRNCMP ++ addq $(VEC_SIZE * 3), %rcx ++ cmpq %rcx, %r11 ++ jbe L(zero) ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl (%rsi, %rcx), %esi ++ cmpl (%rdx, %rcx), %esi ++ jne L(wcscmp_return) ++# else ++ movzbl (%rax, %rcx), %eax ++ movzbl (%rdx, %rcx), %edx ++ subl %edx, %eax ++# endif ++# else ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl (VEC_SIZE * 3)(%rsi, %rcx), %esi ++ cmpl (VEC_SIZE * 3)(%rdx, %rcx), %esi ++ jne L(wcscmp_return) ++# else ++ movzbl (VEC_SIZE * 3)(%rax, %rcx), %eax ++ movzbl (VEC_SIZE * 3)(%rdx, %rcx), %edx ++ subl %edx, %eax ++# endif ++# endif ++ ret ++ ++ .p2align 4 ++L(loop_cross_page): ++ xorl %r10d, %r10d ++ movq %rdx, %rcx ++ /* Align load via RDX. We load the extra ECX bytes which should ++ be ignored. */ ++ andl $((VEC_SIZE * 4) - 1), %ecx ++ /* R10 is -RCX. */ ++ subq %rcx, %r10 ++ ++ /* This works only if VEC_SIZE * 2 == 64. */ ++# if (VEC_SIZE * 2) != 64 ++# error (VEC_SIZE * 2) != 64 ++# endif ++ ++ /* Check if the first VEC_SIZE * 2 bytes should be ignored. */ ++ cmpl $(VEC_SIZE * 2), %ecx ++ jge L(loop_cross_page_2_vec) ++ ++ VMOVU (%rax, %r10), %YMM2 ++ VMOVU VEC_SIZE(%rax, %r10), %YMM3 ++ VMOVU (%rdx, %r10), %YMM4 ++ VMOVU VEC_SIZE(%rdx, %r10), %YMM5 ++ ++ VPCMP $4, %YMM4, %YMM2, %k0 ++ VPCMP $0, %YMMZERO, %YMM2, %k1 ++ VPCMP $0, %YMMZERO, %YMM4, %k2 ++ kord %k1, %k2, %k1 ++ /* Each bit in K1 represents a NULL or a mismatch in YMM2 and ++ YMM4. */ ++ kord %k0, %k1, %k1 ++ ++ VPCMP $4, %YMM5, %YMM3, %k3 ++ VPCMP $0, %YMMZERO, %YMM3, %k4 ++ VPCMP $0, %YMMZERO, %YMM5, %k5 ++ kord %k4, %k5, %k4 ++ /* Each bit in K3 represents a NULL or a mismatch in YMM3 and ++ YMM5. */ ++ kord %k3, %k4, %k3 ++ ++# ifdef USE_AS_WCSCMP ++ /* NB: Each bit in K1/K3 represents 4-byte element. */ ++ kshiftlw $8, %k3, %k2 ++ /* NB: Divide shift count by 4 since each bit in K1 represent 4 ++ bytes. */ ++ movl %ecx, %SHIFT_REG32 ++ sarl $2, %SHIFT_REG32 ++# else ++ kshiftlq $32, %k3, %k2 ++# endif ++ ++ /* Each bit in K1 represents a NULL or a mismatch. */ ++ korq %k1, %k2, %k1 ++ kmovq %k1, %rdi ++ ++ /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */ ++ shrxq %SHIFT_REG64, %rdi, %rdi ++ testq %rdi, %rdi ++ je L(loop_cross_page_2_vec) ++ tzcntq %rdi, %rcx ++# ifdef USE_AS_WCSCMP ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ sall $2, %ecx ++# endif ++# ifdef USE_AS_STRNCMP ++ cmpq %rcx, %r11 ++ jbe L(zero) ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl (%rsi, %rcx), %edi ++ cmpl (%rdx, %rcx), %edi ++ jne L(wcscmp_return) ++# else ++ movzbl (%rax, %rcx), %eax ++ movzbl (%rdx, %rcx), %edx ++ subl %edx, %eax ++# endif ++# else ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl (%rsi, %rcx), %edi ++ cmpl (%rdx, %rcx), %edi ++ jne L(wcscmp_return) ++# else ++ movzbl (%rax, %rcx), %eax ++ movzbl (%rdx, %rcx), %edx ++ subl %edx, %eax ++# endif ++# endif ++ ret ++ ++ .p2align 4 ++L(loop_cross_page_2_vec): ++ /* The first VEC_SIZE * 2 bytes match or are ignored. */ ++ VMOVU (VEC_SIZE * 2)(%rax, %r10), %YMM0 ++ VMOVU (VEC_SIZE * 3)(%rax, %r10), %YMM1 ++ VMOVU (VEC_SIZE * 2)(%rdx, %r10), %YMM2 ++ VMOVU (VEC_SIZE * 3)(%rdx, %r10), %YMM3 ++ ++ VPCMP $4, %YMM0, %YMM2, %k0 ++ VPCMP $0, %YMMZERO, %YMM0, %k1 ++ VPCMP $0, %YMMZERO, %YMM2, %k2 ++ kord %k1, %k2, %k1 ++ /* Each bit in K1 represents a NULL or a mismatch in YMM0 and ++ YMM2. */ ++ kord %k0, %k1, %k1 ++ ++ VPCMP $4, %YMM1, %YMM3, %k3 ++ VPCMP $0, %YMMZERO, %YMM1, %k4 ++ VPCMP $0, %YMMZERO, %YMM3, %k5 ++ kord %k4, %k5, %k4 ++ /* Each bit in K3 represents a NULL or a mismatch in YMM1 and ++ YMM3. */ ++ kord %k3, %k4, %k3 ++ ++# ifdef USE_AS_WCSCMP ++ /* NB: Each bit in K1/K3 represents 4-byte element. */ ++ kshiftlw $8, %k3, %k2 ++# else ++ kshiftlq $32, %k3, %k2 ++# endif ++ ++ /* Each bit in K1 represents a NULL or a mismatch. */ ++ korq %k1, %k2, %k1 ++ kmovq %k1, %rdi ++ ++ xorl %r8d, %r8d ++ /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */ ++ subl $(VEC_SIZE * 2), %ecx ++ jle 1f ++ /* R8 has number of bytes skipped. */ ++ movl %ecx, %r8d ++# ifdef USE_AS_WCSCMP ++ /* NB: Divide shift count by 4 since each bit in K1 represent 4 ++ bytes. */ ++ sarl $2, %ecx ++# endif ++ /* Skip ECX bytes. */ ++ shrq %cl, %rdi ++1: ++ /* Before jumping back to the loop, set ESI to the number of ++ VEC_SIZE * 4 blocks before page crossing. */ ++ movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi ++ ++ testq %rdi, %rdi ++# ifdef USE_AS_STRNCMP ++ /* At this point, if %rdi value is 0, it already tested ++ VEC_SIZE*4+%r10 byte starting from %rax. This label ++ checks whether strncmp maximum offset reached or not. */ ++ je L(string_nbyte_offset_check) ++# else ++ je L(back_to_loop) ++# endif ++ tzcntq %rdi, %rcx ++# ifdef USE_AS_WCSCMP ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ sall $2, %ecx ++# endif ++ addq %r10, %rcx ++ /* Adjust for number of bytes skipped. */ ++ addq %r8, %rcx ++# ifdef USE_AS_STRNCMP ++ addq $(VEC_SIZE * 2), %rcx ++ subq %rcx, %r11 ++ jbe L(zero) ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl (%rsi, %rcx), %edi ++ cmpl (%rdx, %rcx), %edi ++ jne L(wcscmp_return) ++# else ++ movzbl (%rax, %rcx), %eax ++ movzbl (%rdx, %rcx), %edx ++ subl %edx, %eax ++# endif ++# else ++# ifdef USE_AS_WCSCMP ++ movq %rax, %rsi ++ xorl %eax, %eax ++ movl (VEC_SIZE * 2)(%rsi, %rcx), %edi ++ cmpl (VEC_SIZE * 2)(%rdx, %rcx), %edi ++ jne L(wcscmp_return) ++# else ++ movzbl (VEC_SIZE * 2)(%rax, %rcx), %eax ++ movzbl (VEC_SIZE * 2)(%rdx, %rcx), %edx ++ subl %edx, %eax ++# endif ++# endif ++ ret ++ ++# ifdef USE_AS_STRNCMP ++L(string_nbyte_offset_check): ++ leaq (VEC_SIZE * 4)(%r10), %r10 ++ cmpq %r10, %r11 ++ jbe L(zero) ++ jmp L(back_to_loop) ++# endif ++ ++ .p2align 4 ++L(cross_page_loop): ++ /* Check one byte/dword at a time. */ ++# ifdef USE_AS_WCSCMP ++ cmpl %ecx, %eax ++# else ++ subl %ecx, %eax ++# endif ++ jne L(different) ++ addl $SIZE_OF_CHAR, %edx ++ cmpl $(VEC_SIZE * 4), %edx ++ je L(main_loop_header) ++# ifdef USE_AS_STRNCMP ++ cmpq %r11, %rdx ++ jae L(zero) ++# endif ++# ifdef USE_AS_WCSCMP ++ movl (%rdi, %rdx), %eax ++ movl (%rsi, %rdx), %ecx ++# else ++ movzbl (%rdi, %rdx), %eax ++ movzbl (%rsi, %rdx), %ecx ++# endif ++ /* Check null char. */ ++ testl %eax, %eax ++ jne L(cross_page_loop) ++ /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED ++ comparisons. */ ++ subl %ecx, %eax ++# ifndef USE_AS_WCSCMP ++L(different): ++# endif ++ ret ++ ++# ifdef USE_AS_WCSCMP ++ .p2align 4 ++L(different): ++ /* Use movl to avoid modifying EFLAGS. */ ++ movl $0, %eax ++ setl %al ++ negl %eax ++ orl $1, %eax ++ ret ++# endif ++ ++# ifdef USE_AS_STRNCMP ++ .p2align 4 ++L(zero): ++ xorl %eax, %eax ++ ret ++ ++ .p2align 4 ++L(char0): ++# ifdef USE_AS_WCSCMP ++ xorl %eax, %eax ++ movl (%rdi), %ecx ++ cmpl (%rsi), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl (%rsi), %ecx ++ movzbl (%rdi), %eax ++ subl %ecx, %eax ++# endif ++ ret ++# endif ++ ++ .p2align 4 ++L(last_vector): ++ addq %rdx, %rdi ++ addq %rdx, %rsi ++# ifdef USE_AS_STRNCMP ++ subq %rdx, %r11 ++# endif ++ tzcntl %ecx, %edx ++# ifdef USE_AS_WCSCMP ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ sall $2, %edx ++# endif ++# ifdef USE_AS_STRNCMP ++ cmpq %r11, %rdx ++ jae L(zero) ++# endif ++# ifdef USE_AS_WCSCMP ++ xorl %eax, %eax ++ movl (%rdi, %rdx), %ecx ++ cmpl (%rsi, %rdx), %ecx ++ jne L(wcscmp_return) ++# else ++ movzbl (%rdi, %rdx), %eax ++ movzbl (%rsi, %rdx), %edx ++ subl %edx, %eax ++# endif ++ ret ++ ++ /* Comparing on page boundary region requires special treatment: ++ It must done one vector at the time, starting with the wider ++ ymm vector if possible, if not, with xmm. If fetching 16 bytes ++ (xmm) still passes the boundary, byte comparison must be done. ++ */ ++ .p2align 4 ++L(cross_page): ++ /* Try one ymm vector at a time. */ ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ++ jg L(cross_page_1_vector) ++L(loop_1_vector): ++ VMOVU (%rdi, %rdx), %YMM0 ++ VMOVU (%rsi, %rdx), %YMM1 ++ ++ /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ ++ VPCMP $4, %YMM0, %YMM1, %k0 ++ VPCMP $0, %YMMZERO, %YMM0, %k1 ++ VPCMP $0, %YMMZERO, %YMM1, %k2 ++ /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ ++ kord %k1, %k2, %k1 ++ /* Each bit in K1 represents a NULL or a mismatch. */ ++ kord %k0, %k1, %k1 ++ kmovd %k1, %ecx ++ testl %ecx, %ecx ++ jne L(last_vector) ++ ++ addl $VEC_SIZE, %edx ++ ++ addl $VEC_SIZE, %eax ++# ifdef USE_AS_STRNCMP ++ /* Return 0 if the current offset (%rdx) >= the maximum offset ++ (%r11). */ ++ cmpq %r11, %rdx ++ jae L(zero) ++# endif ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ++ jle L(loop_1_vector) ++L(cross_page_1_vector): ++ /* Less than 32 bytes to check, try one xmm vector. */ ++ cmpl $(PAGE_SIZE - 16), %eax ++ jg L(cross_page_1_xmm) ++ VMOVU (%rdi, %rdx), %XMM0 ++ VMOVU (%rsi, %rdx), %XMM1 ++ ++ /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ ++ VPCMP $4, %XMM0, %XMM1, %k0 ++ VPCMP $0, %XMMZERO, %XMM0, %k1 ++ VPCMP $0, %XMMZERO, %XMM1, %k2 ++ /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ ++ korw %k1, %k2, %k1 ++ /* Each bit in K1 represents a NULL or a mismatch. */ ++ korw %k0, %k1, %k1 ++ kmovw %k1, %ecx ++ testl %ecx, %ecx ++ jne L(last_vector) ++ ++ addl $16, %edx ++# ifndef USE_AS_WCSCMP ++ addl $16, %eax ++# endif ++# ifdef USE_AS_STRNCMP ++ /* Return 0 if the current offset (%rdx) >= the maximum offset ++ (%r11). */ ++ cmpq %r11, %rdx ++ jae L(zero) ++# endif ++ ++L(cross_page_1_xmm): ++# ifndef USE_AS_WCSCMP ++ /* Less than 16 bytes to check, try 8 byte vector. NB: No need ++ for wcscmp nor wcsncmp since wide char is 4 bytes. */ ++ cmpl $(PAGE_SIZE - 8), %eax ++ jg L(cross_page_8bytes) ++ vmovq (%rdi, %rdx), %XMM0 ++ vmovq (%rsi, %rdx), %XMM1 ++ ++ /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ ++ VPCMP $4, %XMM0, %XMM1, %k0 ++ VPCMP $0, %XMMZERO, %XMM0, %k1 ++ VPCMP $0, %XMMZERO, %XMM1, %k2 ++ /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ ++ kord %k1, %k2, %k1 ++ /* Each bit in K1 represents a NULL or a mismatch. */ ++ kord %k0, %k1, %k1 ++ kmovd %k1, %ecx ++ ++# ifdef USE_AS_WCSCMP ++ /* Only last 2 bits are valid. */ ++ andl $0x3, %ecx ++# else ++ /* Only last 8 bits are valid. */ ++ andl $0xff, %ecx ++# endif ++ ++ testl %ecx, %ecx ++ jne L(last_vector) ++ ++ addl $8, %edx ++ addl $8, %eax ++# ifdef USE_AS_STRNCMP ++ /* Return 0 if the current offset (%rdx) >= the maximum offset ++ (%r11). */ ++ cmpq %r11, %rdx ++ jae L(zero) ++# endif ++ ++L(cross_page_8bytes): ++ /* Less than 8 bytes to check, try 4 byte vector. */ ++ cmpl $(PAGE_SIZE - 4), %eax ++ jg L(cross_page_4bytes) ++ vmovd (%rdi, %rdx), %XMM0 ++ vmovd (%rsi, %rdx), %XMM1 ++ ++ /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ ++ VPCMP $4, %XMM0, %XMM1, %k0 ++ VPCMP $0, %XMMZERO, %XMM0, %k1 ++ VPCMP $0, %XMMZERO, %XMM1, %k2 ++ /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ ++ kord %k1, %k2, %k1 ++ /* Each bit in K1 represents a NULL or a mismatch. */ ++ kord %k0, %k1, %k1 ++ kmovd %k1, %ecx ++ ++# ifdef USE_AS_WCSCMP ++ /* Only the last bit is valid. */ ++ andl $0x1, %ecx ++# else ++ /* Only last 4 bits are valid. */ ++ andl $0xf, %ecx ++# endif ++ ++ testl %ecx, %ecx ++ jne L(last_vector) ++ ++ addl $4, %edx ++# ifdef USE_AS_STRNCMP ++ /* Return 0 if the current offset (%rdx) >= the maximum offset ++ (%r11). */ ++ cmpq %r11, %rdx ++ jae L(zero) ++# endif ++ ++L(cross_page_4bytes): ++# endif ++ /* Less than 4 bytes to check, try one byte/dword at a time. */ ++# ifdef USE_AS_STRNCMP ++ cmpq %r11, %rdx ++ jae L(zero) ++# endif ++# ifdef USE_AS_WCSCMP ++ movl (%rdi, %rdx), %eax ++ movl (%rsi, %rdx), %ecx ++# else ++ movzbl (%rdi, %rdx), %eax ++ movzbl (%rsi, %rdx), %ecx ++# endif ++ testl %eax, %eax ++ jne L(cross_page_loop) ++ subl %ecx, %eax ++ ret ++END (STRCMP) ++#endif +diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c +index 16ae72a4c8..df4ba875d9 100644 +--- a/sysdeps/x86_64/multiarch/strcmp.c ++++ b/sysdeps/x86_64/multiarch/strcmp.c +@@ -30,16 +30,29 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + +- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) +- && CPU_FEATURE_USABLE_P (cpu_features, AVX2) ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) +- return OPTIMIZE (avx2); ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2) ++ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP)) ++ return OPTIMIZE (evex); ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ return OPTIMIZE (avx2_rtm); ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) ++ return OPTIMIZE (avx2); ++ } + + if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) + return OPTIMIZE (sse2_unaligned); +diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S +new file mode 100644 +index 0000000000..c2c581ecf7 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S +@@ -0,0 +1,12 @@ ++#ifndef STRCPY ++# define STRCPY __strcpy_avx2_rtm ++#endif ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define SECTION(p) p##.avx.rtm ++ ++#include "strcpy-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2.S b/sysdeps/x86_64/multiarch/strcpy-avx2.S +index 3f2f9e8170..1ce17253ab 100644 +--- a/sysdeps/x86_64/multiarch/strcpy-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcpy-avx2.S +@@ -37,6 +37,10 @@ + # define VZEROUPPER vzeroupper + # endif + ++# ifndef SECTION ++# define SECTION(p) p##.avx ++# endif ++ + /* zero register */ + #define xmmZ xmm0 + #define ymmZ ymm0 +@@ -46,7 +50,7 @@ + + # ifndef USE_AS_STRCAT + +- .section .text.avx,"ax",@progbits ++ .section SECTION(.text),"ax",@progbits + ENTRY (STRCPY) + # ifdef USE_AS_STRNCPY + mov %RDX_LP, %R8_LP +@@ -369,8 +373,8 @@ L(CopyVecSizeExit): + lea 1(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) + # endif +- VZEROUPPER +- ret ++L(return_vzeroupper): ++ ZERO_UPPER_VEC_REGISTERS_RETURN + + .p2align 4 + L(CopyTwoVecSize1): +@@ -553,8 +557,7 @@ L(Exit1): + lea 2(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(Exit2): +@@ -569,8 +572,7 @@ L(Exit2): + lea 3(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(Exit3): +@@ -584,8 +586,7 @@ L(Exit3): + lea 4(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(Exit4_7): +@@ -602,8 +603,7 @@ L(Exit4_7): + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(Exit8_15): +@@ -620,8 +620,7 @@ L(Exit8_15): + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(Exit16_31): +@@ -638,8 +637,7 @@ L(Exit16_31): + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(Exit32_63): +@@ -656,8 +654,7 @@ L(Exit32_63): + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + # ifdef USE_AS_STRNCPY + +@@ -671,8 +668,7 @@ L(StrncpyExit1): + # ifdef USE_AS_STRCAT + movb $0, 1(%rdi) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(StrncpyExit2): +@@ -684,8 +680,7 @@ L(StrncpyExit2): + # ifdef USE_AS_STRCAT + movb $0, 2(%rdi) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(StrncpyExit3_4): +@@ -699,8 +694,7 @@ L(StrncpyExit3_4): + # ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(StrncpyExit5_8): +@@ -714,8 +708,7 @@ L(StrncpyExit5_8): + # ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(StrncpyExit9_16): +@@ -729,8 +722,7 @@ L(StrncpyExit9_16): + # ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(StrncpyExit17_32): +@@ -744,8 +736,7 @@ L(StrncpyExit17_32): + # ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(StrncpyExit33_64): +@@ -760,8 +751,7 @@ L(StrncpyExit33_64): + # ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(StrncpyExit65): +@@ -778,50 +768,43 @@ L(StrncpyExit65): + # ifdef USE_AS_STRCAT + movb $0, 65(%rdi) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + # ifndef USE_AS_STRCAT + + .p2align 4 + L(Fill1): + mov %dl, (%rdi) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(Fill2): + mov %dx, (%rdi) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(Fill3_4): + mov %dx, (%rdi) + mov %dx, -2(%rdi, %r8) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(Fill5_8): + mov %edx, (%rdi) + mov %edx, -4(%rdi, %r8) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(Fill9_16): + mov %rdx, (%rdi) + mov %rdx, -8(%rdi, %r8) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(Fill17_32): + vmovdqu %xmmZ, (%rdi) + vmovdqu %xmmZ, -16(%rdi, %r8) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(CopyVecSizeUnalignedVec2): +@@ -898,8 +881,7 @@ L(Fill): + cmp $1, %r8d + ja L(Fill2) + je L(Fill1) +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + /* end of ifndef USE_AS_STRCAT */ + # endif +@@ -929,8 +911,7 @@ L(UnalignedFourVecSizeLeaveCase3): + # ifdef USE_AS_STRCAT + movb $0, (VEC_SIZE * 4)(%rdi) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(UnalignedFourVecSizeLeaveCase2): +@@ -1001,16 +982,14 @@ L(StrncpyExit): + # ifdef USE_AS_STRCAT + movb $0, (%rdi) + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(ExitZero): + # ifndef USE_AS_STRCAT + mov %rdi, %rax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + # endif + +diff --git a/sysdeps/x86_64/multiarch/strcpy-evex.S b/sysdeps/x86_64/multiarch/strcpy-evex.S +new file mode 100644 +index 0000000000..a343a1a692 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strcpy-evex.S +@@ -0,0 +1,1003 @@ ++/* strcpy with 256-bit EVEX instructions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++ ++# ifndef USE_AS_STRCAT ++# include <sysdep.h> ++ ++# ifndef STRCPY ++# define STRCPY __strcpy_evex ++# endif ++ ++# endif ++ ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 ++ ++/* Number of bytes in a vector register */ ++# ifndef VEC_SIZE ++# define VEC_SIZE 32 ++# endif ++ ++# define XMM2 xmm18 ++# define XMM3 xmm19 ++ ++# define YMM2 ymm18 ++# define YMM3 ymm19 ++# define YMM4 ymm20 ++# define YMM5 ymm21 ++# define YMM6 ymm22 ++# define YMM7 ymm23 ++ ++# ifndef USE_AS_STRCAT ++ ++/* zero register */ ++# define XMMZERO xmm16 ++# define YMMZERO ymm16 ++# define YMM1 ymm17 ++ ++ .section .text.evex,"ax",@progbits ++ENTRY (STRCPY) ++# ifdef USE_AS_STRNCPY ++ mov %RDX_LP, %R8_LP ++ test %R8_LP, %R8_LP ++ jz L(ExitZero) ++# endif ++ mov %rsi, %rcx ++# ifndef USE_AS_STPCPY ++ mov %rdi, %rax /* save result */ ++# endif ++ ++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO ++# endif ++ ++ and $((VEC_SIZE * 4) - 1), %ecx ++ cmp $(VEC_SIZE * 2), %ecx ++ jbe L(SourceStringAlignmentLessTwoVecSize) ++ ++ and $-VEC_SIZE, %rsi ++ and $(VEC_SIZE - 1), %ecx ++ ++ vpcmpb $0, (%rsi), %YMMZERO, %k0 ++ kmovd %k0, %edx ++ shr %cl, %rdx ++ ++# ifdef USE_AS_STRNCPY ++# if defined USE_AS_STPCPY || defined USE_AS_STRCAT ++ mov $VEC_SIZE, %r10 ++ sub %rcx, %r10 ++ cmp %r10, %r8 ++# else ++ mov $(VEC_SIZE + 1), %r10 ++ sub %rcx, %r10 ++ cmp %r10, %r8 ++# endif ++ jbe L(CopyVecSizeTailCase2OrCase3) ++# endif ++ test %edx, %edx ++ jnz L(CopyVecSizeTail) ++ ++ vpcmpb $0, VEC_SIZE(%rsi), %YMMZERO, %k1 ++ kmovd %k1, %edx ++ ++# ifdef USE_AS_STRNCPY ++ add $VEC_SIZE, %r10 ++ cmp %r10, %r8 ++ jbe L(CopyTwoVecSizeCase2OrCase3) ++# endif ++ test %edx, %edx ++ jnz L(CopyTwoVecSize) ++ ++ VMOVU (%rsi, %rcx), %YMM2 /* copy VEC_SIZE bytes */ ++ VMOVU %YMM2, (%rdi) ++ ++/* If source address alignment != destination address alignment */ ++ .p2align 4 ++L(UnalignVecSizeBoth): ++ sub %rcx, %rdi ++# ifdef USE_AS_STRNCPY ++ add %rcx, %r8 ++ sbb %rcx, %rcx ++ or %rcx, %r8 ++# endif ++ mov $VEC_SIZE, %rcx ++ VMOVA (%rsi, %rcx), %YMM2 ++ VMOVU %YMM2, (%rdi, %rcx) ++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM2 ++ vpcmpb $0, %YMM2, %YMMZERO, %k0 ++ kmovd %k0, %edx ++ add $VEC_SIZE, %rcx ++# ifdef USE_AS_STRNCPY ++ sub $(VEC_SIZE * 3), %r8 ++ jbe L(CopyVecSizeCase2OrCase3) ++# endif ++ test %edx, %edx ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ jnz L(CopyVecSizeUnalignedVec2) ++# else ++ jnz L(CopyVecSize) ++# endif ++ ++ VMOVU %YMM2, (%rdi, %rcx) ++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM3 ++ vpcmpb $0, %YMM3, %YMMZERO, %k0 ++ kmovd %k0, %edx ++ add $VEC_SIZE, %rcx ++# ifdef USE_AS_STRNCPY ++ sub $VEC_SIZE, %r8 ++ jbe L(CopyVecSizeCase2OrCase3) ++# endif ++ test %edx, %edx ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ jnz L(CopyVecSizeUnalignedVec3) ++# else ++ jnz L(CopyVecSize) ++# endif ++ ++ VMOVU %YMM3, (%rdi, %rcx) ++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM4 ++ vpcmpb $0, %YMM4, %YMMZERO, %k0 ++ kmovd %k0, %edx ++ add $VEC_SIZE, %rcx ++# ifdef USE_AS_STRNCPY ++ sub $VEC_SIZE, %r8 ++ jbe L(CopyVecSizeCase2OrCase3) ++# endif ++ test %edx, %edx ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ jnz L(CopyVecSizeUnalignedVec4) ++# else ++ jnz L(CopyVecSize) ++# endif ++ ++ VMOVU %YMM4, (%rdi, %rcx) ++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM2 ++ vpcmpb $0, %YMM2, %YMMZERO, %k0 ++ kmovd %k0, %edx ++ add $VEC_SIZE, %rcx ++# ifdef USE_AS_STRNCPY ++ sub $VEC_SIZE, %r8 ++ jbe L(CopyVecSizeCase2OrCase3) ++# endif ++ test %edx, %edx ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ jnz L(CopyVecSizeUnalignedVec2) ++# else ++ jnz L(CopyVecSize) ++# endif ++ ++ VMOVU %YMM2, (%rdi, %rcx) ++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM2 ++ vpcmpb $0, %YMM2, %YMMZERO, %k0 ++ kmovd %k0, %edx ++ add $VEC_SIZE, %rcx ++# ifdef USE_AS_STRNCPY ++ sub $VEC_SIZE, %r8 ++ jbe L(CopyVecSizeCase2OrCase3) ++# endif ++ test %edx, %edx ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ jnz L(CopyVecSizeUnalignedVec2) ++# else ++ jnz L(CopyVecSize) ++# endif ++ ++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM3 ++ VMOVU %YMM2, (%rdi, %rcx) ++ vpcmpb $0, %YMM3, %YMMZERO, %k0 ++ kmovd %k0, %edx ++ add $VEC_SIZE, %rcx ++# ifdef USE_AS_STRNCPY ++ sub $VEC_SIZE, %r8 ++ jbe L(CopyVecSizeCase2OrCase3) ++# endif ++ test %edx, %edx ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ jnz L(CopyVecSizeUnalignedVec3) ++# else ++ jnz L(CopyVecSize) ++# endif ++ ++ VMOVU %YMM3, (%rdi, %rcx) ++ mov %rsi, %rdx ++ lea VEC_SIZE(%rsi, %rcx), %rsi ++ and $-(VEC_SIZE * 4), %rsi ++ sub %rsi, %rdx ++ sub %rdx, %rdi ++# ifdef USE_AS_STRNCPY ++ lea (VEC_SIZE * 8)(%r8, %rdx), %r8 ++# endif ++L(UnalignedFourVecSizeLoop): ++ VMOVA (%rsi), %YMM4 ++ VMOVA VEC_SIZE(%rsi), %YMM5 ++ VMOVA (VEC_SIZE * 2)(%rsi), %YMM6 ++ VMOVA (VEC_SIZE * 3)(%rsi), %YMM7 ++ vpminub %YMM5, %YMM4, %YMM2 ++ vpminub %YMM7, %YMM6, %YMM3 ++ vpminub %YMM2, %YMM3, %YMM2 ++ /* If K7 != 0, there is a null byte. */ ++ vpcmpb $0, %YMM2, %YMMZERO, %k7 ++ kmovd %k7, %edx ++# ifdef USE_AS_STRNCPY ++ sub $(VEC_SIZE * 4), %r8 ++ jbe L(UnalignedLeaveCase2OrCase3) ++# endif ++ test %edx, %edx ++ jnz L(UnalignedFourVecSizeLeave) ++ ++L(UnalignedFourVecSizeLoop_start): ++ add $(VEC_SIZE * 4), %rdi ++ add $(VEC_SIZE * 4), %rsi ++ VMOVU %YMM4, -(VEC_SIZE * 4)(%rdi) ++ VMOVA (%rsi), %YMM4 ++ VMOVU %YMM5, -(VEC_SIZE * 3)(%rdi) ++ VMOVA VEC_SIZE(%rsi), %YMM5 ++ vpminub %YMM5, %YMM4, %YMM2 ++ VMOVU %YMM6, -(VEC_SIZE * 2)(%rdi) ++ VMOVA (VEC_SIZE * 2)(%rsi), %YMM6 ++ VMOVU %YMM7, -VEC_SIZE(%rdi) ++ VMOVA (VEC_SIZE * 3)(%rsi), %YMM7 ++ vpminub %YMM7, %YMM6, %YMM3 ++ vpminub %YMM2, %YMM3, %YMM2 ++ /* If K7 != 0, there is a null byte. */ ++ vpcmpb $0, %YMM2, %YMMZERO, %k7 ++ kmovd %k7, %edx ++# ifdef USE_AS_STRNCPY ++ sub $(VEC_SIZE * 4), %r8 ++ jbe L(UnalignedLeaveCase2OrCase3) ++# endif ++ test %edx, %edx ++ jz L(UnalignedFourVecSizeLoop_start) ++ ++L(UnalignedFourVecSizeLeave): ++ vpcmpb $0, %YMM4, %YMMZERO, %k1 ++ kmovd %k1, %edx ++ test %edx, %edx ++ jnz L(CopyVecSizeUnaligned_0) ++ ++ vpcmpb $0, %YMM5, %YMMZERO, %k2 ++ kmovd %k2, %ecx ++ test %ecx, %ecx ++ jnz L(CopyVecSizeUnaligned_16) ++ ++ vpcmpb $0, %YMM6, %YMMZERO, %k3 ++ kmovd %k3, %edx ++ test %edx, %edx ++ jnz L(CopyVecSizeUnaligned_32) ++ ++ vpcmpb $0, %YMM7, %YMMZERO, %k4 ++ kmovd %k4, %ecx ++ bsf %ecx, %edx ++ VMOVU %YMM4, (%rdi) ++ VMOVU %YMM5, VEC_SIZE(%rdi) ++ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++# ifdef USE_AS_STPCPY ++ lea (VEC_SIZE * 3)(%rdi, %rdx), %rax ++# endif ++ VMOVU %YMM7, (VEC_SIZE * 3)(%rdi) ++ add $(VEC_SIZE - 1), %r8 ++ sub %rdx, %r8 ++ lea ((VEC_SIZE * 3) + 1)(%rdi, %rdx), %rdi ++ jmp L(StrncpyFillTailWithZero) ++# else ++ add $(VEC_SIZE * 3), %rsi ++ add $(VEC_SIZE * 3), %rdi ++ jmp L(CopyVecSizeExit) ++# endif ++ ++/* If source address alignment == destination address alignment */ ++ ++L(SourceStringAlignmentLessTwoVecSize): ++ VMOVU (%rsi), %YMM3 ++ VMOVU VEC_SIZE(%rsi), %YMM2 ++ vpcmpb $0, %YMM3, %YMMZERO, %k0 ++ kmovd %k0, %edx ++ ++# ifdef USE_AS_STRNCPY ++# if defined USE_AS_STPCPY || defined USE_AS_STRCAT ++ cmp $VEC_SIZE, %r8 ++# else ++ cmp $(VEC_SIZE + 1), %r8 ++# endif ++ jbe L(CopyVecSizeTail1Case2OrCase3) ++# endif ++ test %edx, %edx ++ jnz L(CopyVecSizeTail1) ++ ++ VMOVU %YMM3, (%rdi) ++ vpcmpb $0, %YMM2, %YMMZERO, %k0 ++ kmovd %k0, %edx ++ ++# ifdef USE_AS_STRNCPY ++# if defined USE_AS_STPCPY || defined USE_AS_STRCAT ++ cmp $(VEC_SIZE * 2), %r8 ++# else ++ cmp $((VEC_SIZE * 2) + 1), %r8 ++# endif ++ jbe L(CopyTwoVecSize1Case2OrCase3) ++# endif ++ test %edx, %edx ++ jnz L(CopyTwoVecSize1) ++ ++ and $-VEC_SIZE, %rsi ++ and $(VEC_SIZE - 1), %ecx ++ jmp L(UnalignVecSizeBoth) ++ ++/*------End of main part with loops---------------------*/ ++ ++/* Case1 */ ++ ++# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT) ++ .p2align 4 ++L(CopyVecSize): ++ add %rcx, %rdi ++# endif ++L(CopyVecSizeTail): ++ add %rcx, %rsi ++L(CopyVecSizeTail1): ++ bsf %edx, %edx ++L(CopyVecSizeExit): ++ cmp $32, %edx ++ jae L(Exit32_63) ++ cmp $16, %edx ++ jae L(Exit16_31) ++ cmp $8, %edx ++ jae L(Exit8_15) ++ cmp $4, %edx ++ jae L(Exit4_7) ++ cmp $3, %edx ++ je L(Exit3) ++ cmp $1, %edx ++ ja L(Exit2) ++ je L(Exit1) ++ movb $0, (%rdi) ++# ifdef USE_AS_STPCPY ++ lea (%rdi), %rax ++# endif ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ sub $1, %r8 ++ lea 1(%rdi), %rdi ++ jnz L(StrncpyFillTailWithZero) ++# endif ++ ret ++ ++ .p2align 4 ++L(CopyTwoVecSize1): ++ add $VEC_SIZE, %rsi ++ add $VEC_SIZE, %rdi ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ sub $VEC_SIZE, %r8 ++# endif ++ jmp L(CopyVecSizeTail1) ++ ++ .p2align 4 ++L(CopyTwoVecSize): ++ bsf %edx, %edx ++ add %rcx, %rsi ++ add $VEC_SIZE, %edx ++ sub %ecx, %edx ++ jmp L(CopyVecSizeExit) ++ ++ .p2align 4 ++L(CopyVecSizeUnaligned_0): ++ bsf %edx, %edx ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++# ifdef USE_AS_STPCPY ++ lea (%rdi, %rdx), %rax ++# endif ++ VMOVU %YMM4, (%rdi) ++ add $((VEC_SIZE * 4) - 1), %r8 ++ sub %rdx, %r8 ++ lea 1(%rdi, %rdx), %rdi ++ jmp L(StrncpyFillTailWithZero) ++# else ++ jmp L(CopyVecSizeExit) ++# endif ++ ++ .p2align 4 ++L(CopyVecSizeUnaligned_16): ++ bsf %ecx, %edx ++ VMOVU %YMM4, (%rdi) ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++# ifdef USE_AS_STPCPY ++ lea VEC_SIZE(%rdi, %rdx), %rax ++# endif ++ VMOVU %YMM5, VEC_SIZE(%rdi) ++ add $((VEC_SIZE * 3) - 1), %r8 ++ sub %rdx, %r8 ++ lea (VEC_SIZE + 1)(%rdi, %rdx), %rdi ++ jmp L(StrncpyFillTailWithZero) ++# else ++ add $VEC_SIZE, %rsi ++ add $VEC_SIZE, %rdi ++ jmp L(CopyVecSizeExit) ++# endif ++ ++ .p2align 4 ++L(CopyVecSizeUnaligned_32): ++ bsf %edx, %edx ++ VMOVU %YMM4, (%rdi) ++ VMOVU %YMM5, VEC_SIZE(%rdi) ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++# ifdef USE_AS_STPCPY ++ lea (VEC_SIZE * 2)(%rdi, %rdx), %rax ++# endif ++ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) ++ add $((VEC_SIZE * 2) - 1), %r8 ++ sub %rdx, %r8 ++ lea ((VEC_SIZE * 2) + 1)(%rdi, %rdx), %rdi ++ jmp L(StrncpyFillTailWithZero) ++# else ++ add $(VEC_SIZE * 2), %rsi ++ add $(VEC_SIZE * 2), %rdi ++ jmp L(CopyVecSizeExit) ++# endif ++ ++# ifdef USE_AS_STRNCPY ++# ifndef USE_AS_STRCAT ++ .p2align 4 ++L(CopyVecSizeUnalignedVec6): ++ VMOVU %YMM6, (%rdi, %rcx) ++ jmp L(CopyVecSizeVecExit) ++ ++ .p2align 4 ++L(CopyVecSizeUnalignedVec5): ++ VMOVU %YMM5, (%rdi, %rcx) ++ jmp L(CopyVecSizeVecExit) ++ ++ .p2align 4 ++L(CopyVecSizeUnalignedVec4): ++ VMOVU %YMM4, (%rdi, %rcx) ++ jmp L(CopyVecSizeVecExit) ++ ++ .p2align 4 ++L(CopyVecSizeUnalignedVec3): ++ VMOVU %YMM3, (%rdi, %rcx) ++ jmp L(CopyVecSizeVecExit) ++# endif ++ ++/* Case2 */ ++ ++ .p2align 4 ++L(CopyVecSizeCase2): ++ add $VEC_SIZE, %r8 ++ add %rcx, %rdi ++ add %rcx, %rsi ++ bsf %edx, %edx ++ cmp %r8d, %edx ++ jb L(CopyVecSizeExit) ++ jmp L(StrncpyExit) ++ ++ .p2align 4 ++L(CopyTwoVecSizeCase2): ++ add %rcx, %rsi ++ bsf %edx, %edx ++ add $VEC_SIZE, %edx ++ sub %ecx, %edx ++ cmp %r8d, %edx ++ jb L(CopyVecSizeExit) ++ jmp L(StrncpyExit) ++ ++L(CopyVecSizeTailCase2): ++ add %rcx, %rsi ++ bsf %edx, %edx ++ cmp %r8d, %edx ++ jb L(CopyVecSizeExit) ++ jmp L(StrncpyExit) ++ ++L(CopyVecSizeTail1Case2): ++ bsf %edx, %edx ++ cmp %r8d, %edx ++ jb L(CopyVecSizeExit) ++ jmp L(StrncpyExit) ++ ++/* Case2 or Case3, Case3 */ ++ ++ .p2align 4 ++L(CopyVecSizeCase2OrCase3): ++ test %rdx, %rdx ++ jnz L(CopyVecSizeCase2) ++L(CopyVecSizeCase3): ++ add $VEC_SIZE, %r8 ++ add %rcx, %rdi ++ add %rcx, %rsi ++ jmp L(StrncpyExit) ++ ++ .p2align 4 ++L(CopyTwoVecSizeCase2OrCase3): ++ test %rdx, %rdx ++ jnz L(CopyTwoVecSizeCase2) ++ add %rcx, %rsi ++ jmp L(StrncpyExit) ++ ++ .p2align 4 ++L(CopyVecSizeTailCase2OrCase3): ++ test %rdx, %rdx ++ jnz L(CopyVecSizeTailCase2) ++ add %rcx, %rsi ++ jmp L(StrncpyExit) ++ ++ .p2align 4 ++L(CopyTwoVecSize1Case2OrCase3): ++ add $VEC_SIZE, %rdi ++ add $VEC_SIZE, %rsi ++ sub $VEC_SIZE, %r8 ++L(CopyVecSizeTail1Case2OrCase3): ++ test %rdx, %rdx ++ jnz L(CopyVecSizeTail1Case2) ++ jmp L(StrncpyExit) ++# endif ++ ++/*------------End labels regarding with copying 1-VEC_SIZE bytes--and 1-(VEC_SIZE*2) bytes----*/ ++ ++ .p2align 4 ++L(Exit1): ++ movzwl (%rsi), %edx ++ mov %dx, (%rdi) ++# ifdef USE_AS_STPCPY ++ lea 1(%rdi), %rax ++# endif ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ sub $2, %r8 ++ lea 2(%rdi), %rdi ++ jnz L(StrncpyFillTailWithZero) ++# endif ++ ret ++ ++ .p2align 4 ++L(Exit2): ++ movzwl (%rsi), %ecx ++ mov %cx, (%rdi) ++ movb $0, 2(%rdi) ++# ifdef USE_AS_STPCPY ++ lea 2(%rdi), %rax ++# endif ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ sub $3, %r8 ++ lea 3(%rdi), %rdi ++ jnz L(StrncpyFillTailWithZero) ++# endif ++ ret ++ ++ .p2align 4 ++L(Exit3): ++ mov (%rsi), %edx ++ mov %edx, (%rdi) ++# ifdef USE_AS_STPCPY ++ lea 3(%rdi), %rax ++# endif ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ sub $4, %r8 ++ lea 4(%rdi), %rdi ++ jnz L(StrncpyFillTailWithZero) ++# endif ++ ret ++ ++ .p2align 4 ++L(Exit4_7): ++ mov (%rsi), %ecx ++ mov %ecx, (%rdi) ++ mov -3(%rsi, %rdx), %ecx ++ mov %ecx, -3(%rdi, %rdx) ++# ifdef USE_AS_STPCPY ++ lea (%rdi, %rdx), %rax ++# endif ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ sub %rdx, %r8 ++ sub $1, %r8 ++ lea 1(%rdi, %rdx), %rdi ++ jnz L(StrncpyFillTailWithZero) ++# endif ++ ret ++ ++ .p2align 4 ++L(Exit8_15): ++ mov (%rsi), %rcx ++ mov -7(%rsi, %rdx), %r9 ++ mov %rcx, (%rdi) ++ mov %r9, -7(%rdi, %rdx) ++# ifdef USE_AS_STPCPY ++ lea (%rdi, %rdx), %rax ++# endif ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ sub %rdx, %r8 ++ sub $1, %r8 ++ lea 1(%rdi, %rdx), %rdi ++ jnz L(StrncpyFillTailWithZero) ++# endif ++ ret ++ ++ .p2align 4 ++L(Exit16_31): ++ VMOVU (%rsi), %XMM2 ++ VMOVU -15(%rsi, %rdx), %XMM3 ++ VMOVU %XMM2, (%rdi) ++ VMOVU %XMM3, -15(%rdi, %rdx) ++# ifdef USE_AS_STPCPY ++ lea (%rdi, %rdx), %rax ++# endif ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ sub %rdx, %r8 ++ sub $1, %r8 ++ lea 1(%rdi, %rdx), %rdi ++ jnz L(StrncpyFillTailWithZero) ++# endif ++ ret ++ ++ .p2align 4 ++L(Exit32_63): ++ VMOVU (%rsi), %YMM2 ++ VMOVU -31(%rsi, %rdx), %YMM3 ++ VMOVU %YMM2, (%rdi) ++ VMOVU %YMM3, -31(%rdi, %rdx) ++# ifdef USE_AS_STPCPY ++ lea (%rdi, %rdx), %rax ++# endif ++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT ++ sub %rdx, %r8 ++ sub $1, %r8 ++ lea 1(%rdi, %rdx), %rdi ++ jnz L(StrncpyFillTailWithZero) ++# endif ++ ret ++ ++# ifdef USE_AS_STRNCPY ++ ++ .p2align 4 ++L(StrncpyExit1): ++ movzbl (%rsi), %edx ++ mov %dl, (%rdi) ++# ifdef USE_AS_STPCPY ++ lea 1(%rdi), %rax ++# endif ++# ifdef USE_AS_STRCAT ++ movb $0, 1(%rdi) ++# endif ++ ret ++ ++ .p2align 4 ++L(StrncpyExit2): ++ movzwl (%rsi), %edx ++ mov %dx, (%rdi) ++# ifdef USE_AS_STPCPY ++ lea 2(%rdi), %rax ++# endif ++# ifdef USE_AS_STRCAT ++ movb $0, 2(%rdi) ++# endif ++ ret ++ ++ .p2align 4 ++L(StrncpyExit3_4): ++ movzwl (%rsi), %ecx ++ movzwl -2(%rsi, %r8), %edx ++ mov %cx, (%rdi) ++ mov %dx, -2(%rdi, %r8) ++# ifdef USE_AS_STPCPY ++ lea (%rdi, %r8), %rax ++# endif ++# ifdef USE_AS_STRCAT ++ movb $0, (%rdi, %r8) ++# endif ++ ret ++ ++ .p2align 4 ++L(StrncpyExit5_8): ++ mov (%rsi), %ecx ++ mov -4(%rsi, %r8), %edx ++ mov %ecx, (%rdi) ++ mov %edx, -4(%rdi, %r8) ++# ifdef USE_AS_STPCPY ++ lea (%rdi, %r8), %rax ++# endif ++# ifdef USE_AS_STRCAT ++ movb $0, (%rdi, %r8) ++# endif ++ ret ++ ++ .p2align 4 ++L(StrncpyExit9_16): ++ mov (%rsi), %rcx ++ mov -8(%rsi, %r8), %rdx ++ mov %rcx, (%rdi) ++ mov %rdx, -8(%rdi, %r8) ++# ifdef USE_AS_STPCPY ++ lea (%rdi, %r8), %rax ++# endif ++# ifdef USE_AS_STRCAT ++ movb $0, (%rdi, %r8) ++# endif ++ ret ++ ++ .p2align 4 ++L(StrncpyExit17_32): ++ VMOVU (%rsi), %XMM2 ++ VMOVU -16(%rsi, %r8), %XMM3 ++ VMOVU %XMM2, (%rdi) ++ VMOVU %XMM3, -16(%rdi, %r8) ++# ifdef USE_AS_STPCPY ++ lea (%rdi, %r8), %rax ++# endif ++# ifdef USE_AS_STRCAT ++ movb $0, (%rdi, %r8) ++# endif ++ ret ++ ++ .p2align 4 ++L(StrncpyExit33_64): ++ /* 0/32, 31/16 */ ++ VMOVU (%rsi), %YMM2 ++ VMOVU -VEC_SIZE(%rsi, %r8), %YMM3 ++ VMOVU %YMM2, (%rdi) ++ VMOVU %YMM3, -VEC_SIZE(%rdi, %r8) ++# ifdef USE_AS_STPCPY ++ lea (%rdi, %r8), %rax ++# endif ++# ifdef USE_AS_STRCAT ++ movb $0, (%rdi, %r8) ++# endif ++ ret ++ ++ .p2align 4 ++L(StrncpyExit65): ++ /* 0/32, 32/32, 64/1 */ ++ VMOVU (%rsi), %YMM2 ++ VMOVU 32(%rsi), %YMM3 ++ mov 64(%rsi), %cl ++ VMOVU %YMM2, (%rdi) ++ VMOVU %YMM3, 32(%rdi) ++ mov %cl, 64(%rdi) ++# ifdef USE_AS_STPCPY ++ lea 65(%rdi), %rax ++# endif ++# ifdef USE_AS_STRCAT ++ movb $0, 65(%rdi) ++# endif ++ ret ++ ++# ifndef USE_AS_STRCAT ++ ++ .p2align 4 ++L(Fill1): ++ mov %dl, (%rdi) ++ ret ++ ++ .p2align 4 ++L(Fill2): ++ mov %dx, (%rdi) ++ ret ++ ++ .p2align 4 ++L(Fill3_4): ++ mov %dx, (%rdi) ++ mov %dx, -2(%rdi, %r8) ++ ret ++ ++ .p2align 4 ++L(Fill5_8): ++ mov %edx, (%rdi) ++ mov %edx, -4(%rdi, %r8) ++ ret ++ ++ .p2align 4 ++L(Fill9_16): ++ mov %rdx, (%rdi) ++ mov %rdx, -8(%rdi, %r8) ++ ret ++ ++ .p2align 4 ++L(Fill17_32): ++ VMOVU %XMMZERO, (%rdi) ++ VMOVU %XMMZERO, -16(%rdi, %r8) ++ ret ++ ++ .p2align 4 ++L(CopyVecSizeUnalignedVec2): ++ VMOVU %YMM2, (%rdi, %rcx) ++ ++ .p2align 4 ++L(CopyVecSizeVecExit): ++ bsf %edx, %edx ++ add $(VEC_SIZE - 1), %r8 ++ add %rcx, %rdi ++# ifdef USE_AS_STPCPY ++ lea (%rdi, %rdx), %rax ++# endif ++ sub %rdx, %r8 ++ lea 1(%rdi, %rdx), %rdi ++ ++ .p2align 4 ++L(StrncpyFillTailWithZero): ++ xor %edx, %edx ++ sub $VEC_SIZE, %r8 ++ jbe L(StrncpyFillExit) ++ ++ VMOVU %YMMZERO, (%rdi) ++ add $VEC_SIZE, %rdi ++ ++ mov %rdi, %rsi ++ and $(VEC_SIZE - 1), %esi ++ sub %rsi, %rdi ++ add %rsi, %r8 ++ sub $(VEC_SIZE * 4), %r8 ++ jb L(StrncpyFillLessFourVecSize) ++ ++L(StrncpyFillLoopVmovdqa): ++ VMOVA %YMMZERO, (%rdi) ++ VMOVA %YMMZERO, VEC_SIZE(%rdi) ++ VMOVA %YMMZERO, (VEC_SIZE * 2)(%rdi) ++ VMOVA %YMMZERO, (VEC_SIZE * 3)(%rdi) ++ add $(VEC_SIZE * 4), %rdi ++ sub $(VEC_SIZE * 4), %r8 ++ jae L(StrncpyFillLoopVmovdqa) ++ ++L(StrncpyFillLessFourVecSize): ++ add $(VEC_SIZE * 2), %r8 ++ jl L(StrncpyFillLessTwoVecSize) ++ VMOVA %YMMZERO, (%rdi) ++ VMOVA %YMMZERO, VEC_SIZE(%rdi) ++ add $(VEC_SIZE * 2), %rdi ++ sub $VEC_SIZE, %r8 ++ jl L(StrncpyFillExit) ++ VMOVA %YMMZERO, (%rdi) ++ add $VEC_SIZE, %rdi ++ jmp L(Fill) ++ ++ .p2align 4 ++L(StrncpyFillLessTwoVecSize): ++ add $VEC_SIZE, %r8 ++ jl L(StrncpyFillExit) ++ VMOVA %YMMZERO, (%rdi) ++ add $VEC_SIZE, %rdi ++ jmp L(Fill) ++ ++ .p2align 4 ++L(StrncpyFillExit): ++ add $VEC_SIZE, %r8 ++L(Fill): ++ cmp $17, %r8d ++ jae L(Fill17_32) ++ cmp $9, %r8d ++ jae L(Fill9_16) ++ cmp $5, %r8d ++ jae L(Fill5_8) ++ cmp $3, %r8d ++ jae L(Fill3_4) ++ cmp $1, %r8d ++ ja L(Fill2) ++ je L(Fill1) ++ ret ++ ++/* end of ifndef USE_AS_STRCAT */ ++# endif ++ ++ .p2align 4 ++L(UnalignedLeaveCase2OrCase3): ++ test %rdx, %rdx ++ jnz L(UnalignedFourVecSizeLeaveCase2) ++L(UnalignedFourVecSizeLeaveCase3): ++ lea (VEC_SIZE * 4)(%r8), %rcx ++ and $-VEC_SIZE, %rcx ++ add $(VEC_SIZE * 3), %r8 ++ jl L(CopyVecSizeCase3) ++ VMOVU %YMM4, (%rdi) ++ sub $VEC_SIZE, %r8 ++ jb L(CopyVecSizeCase3) ++ VMOVU %YMM5, VEC_SIZE(%rdi) ++ sub $VEC_SIZE, %r8 ++ jb L(CopyVecSizeCase3) ++ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) ++ sub $VEC_SIZE, %r8 ++ jb L(CopyVecSizeCase3) ++ VMOVU %YMM7, (VEC_SIZE * 3)(%rdi) ++# ifdef USE_AS_STPCPY ++ lea (VEC_SIZE * 4)(%rdi), %rax ++# endif ++# ifdef USE_AS_STRCAT ++ movb $0, (VEC_SIZE * 4)(%rdi) ++# endif ++ ret ++ ++ .p2align 4 ++L(UnalignedFourVecSizeLeaveCase2): ++ xor %ecx, %ecx ++ vpcmpb $0, %YMM4, %YMMZERO, %k1 ++ kmovd %k1, %edx ++ add $(VEC_SIZE * 3), %r8 ++ jle L(CopyVecSizeCase2OrCase3) ++ test %edx, %edx ++# ifndef USE_AS_STRCAT ++ jnz L(CopyVecSizeUnalignedVec4) ++# else ++ jnz L(CopyVecSize) ++# endif ++ vpcmpb $0, %YMM5, %YMMZERO, %k2 ++ kmovd %k2, %edx ++ VMOVU %YMM4, (%rdi) ++ add $VEC_SIZE, %rcx ++ sub $VEC_SIZE, %r8 ++ jbe L(CopyVecSizeCase2OrCase3) ++ test %edx, %edx ++# ifndef USE_AS_STRCAT ++ jnz L(CopyVecSizeUnalignedVec5) ++# else ++ jnz L(CopyVecSize) ++# endif ++ ++ vpcmpb $0, %YMM6, %YMMZERO, %k3 ++ kmovd %k3, %edx ++ VMOVU %YMM5, VEC_SIZE(%rdi) ++ add $VEC_SIZE, %rcx ++ sub $VEC_SIZE, %r8 ++ jbe L(CopyVecSizeCase2OrCase3) ++ test %edx, %edx ++# ifndef USE_AS_STRCAT ++ jnz L(CopyVecSizeUnalignedVec6) ++# else ++ jnz L(CopyVecSize) ++# endif ++ ++ vpcmpb $0, %YMM7, %YMMZERO, %k4 ++ kmovd %k4, %edx ++ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) ++ lea VEC_SIZE(%rdi, %rcx), %rdi ++ lea VEC_SIZE(%rsi, %rcx), %rsi ++ bsf %edx, %edx ++ cmp %r8d, %edx ++ jb L(CopyVecSizeExit) ++L(StrncpyExit): ++ cmp $65, %r8d ++ je L(StrncpyExit65) ++ cmp $33, %r8d ++ jae L(StrncpyExit33_64) ++ cmp $17, %r8d ++ jae L(StrncpyExit17_32) ++ cmp $9, %r8d ++ jae L(StrncpyExit9_16) ++ cmp $5, %r8d ++ jae L(StrncpyExit5_8) ++ cmp $3, %r8d ++ jae L(StrncpyExit3_4) ++ cmp $1, %r8d ++ ja L(StrncpyExit2) ++ je L(StrncpyExit1) ++# ifdef USE_AS_STPCPY ++ mov %rdi, %rax ++# endif ++# ifdef USE_AS_STRCAT ++ movb $0, (%rdi) ++# endif ++ ret ++ ++ .p2align 4 ++L(ExitZero): ++# ifndef USE_AS_STRCAT ++ mov %rdi, %rax ++# endif ++ ret ++ ++# endif ++ ++# ifndef USE_AS_STRCAT ++END (STRCPY) ++# else ++END (STRCAT) ++# endif ++#endif +diff --git a/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S +new file mode 100644 +index 0000000000..75b4b7612c +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S +@@ -0,0 +1,12 @@ ++#ifndef STRLEN ++# define STRLEN __strlen_avx2_rtm ++#endif ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define SECTION(p) p##.avx.rtm ++ ++#include "strlen-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S +index 73421ec1b2..45e08e64d6 100644 +--- a/sysdeps/x86_64/multiarch/strlen-avx2.S ++++ b/sysdeps/x86_64/multiarch/strlen-avx2.S +@@ -27,370 +27,531 @@ + # ifdef USE_AS_WCSLEN + # define VPCMPEQ vpcmpeqd + # define VPMINU vpminud ++# define CHAR_SIZE 4 + # else + # define VPCMPEQ vpcmpeqb + # define VPMINU vpminub ++# define CHAR_SIZE 1 + # endif + + # ifndef VZEROUPPER + # define VZEROUPPER vzeroupper + # endif + ++# ifndef SECTION ++# define SECTION(p) p##.avx ++# endif ++ + # define VEC_SIZE 32 ++# define PAGE_SIZE 4096 ++# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + +- .section .text.avx,"ax",@progbits ++ .section SECTION(.text),"ax",@progbits + ENTRY (STRLEN) + # ifdef USE_AS_STRNLEN +- /* Check for zero length. */ ++ /* Check zero length. */ ++# ifdef __ILP32__ ++ /* Clear upper bits. */ ++ and %RSI_LP, %RSI_LP ++# else + test %RSI_LP, %RSI_LP +- jz L(zero) +-# ifdef USE_AS_WCSLEN +- shl $2, %RSI_LP +-# elif defined __ILP32__ +- /* Clear the upper 32 bits. */ +- movl %esi, %esi + # endif ++ jz L(zero) ++ /* Store max len in R8_LP before adjusting if using WCSLEN. */ + mov %RSI_LP, %R8_LP + # endif +- movl %edi, %ecx ++ movl %edi, %eax + movq %rdi, %rdx + vpxor %xmm0, %xmm0, %xmm0 +- ++ /* Clear high bits from edi. Only keeping bits relevant to page ++ cross check. */ ++ andl $(PAGE_SIZE - 1), %eax + /* Check if we may cross page boundary with one vector load. */ +- andl $(2 * VEC_SIZE - 1), %ecx +- cmpl $VEC_SIZE, %ecx +- ja L(cros_page_boundary) ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ++ ja L(cross_page_boundary) + + /* Check the first VEC_SIZE bytes. */ +- VPCMPEQ (%rdi), %ymm0, %ymm1 ++ VPCMPEQ (%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax +- testl %eax, %eax +- + # ifdef USE_AS_STRNLEN +- jnz L(first_vec_x0_check) +- /* Adjust length and check the end of data. */ +- subq $VEC_SIZE, %rsi +- jbe L(max) +-# else +- jnz L(first_vec_x0) ++ /* If length < VEC_SIZE handle special. */ ++ cmpq $CHAR_PER_VEC, %rsi ++ jbe L(first_vec_x0) + # endif +- +- /* Align data for aligned loads in the loop. */ +- addq $VEC_SIZE, %rdi +- andl $(VEC_SIZE - 1), %ecx +- andq $-VEC_SIZE, %rdi ++ /* If empty continue to aligned_more. Otherwise return bit ++ position of first match. */ ++ testl %eax, %eax ++ jz L(aligned_more) ++ tzcntl %eax, %eax ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ ++ shrl $2, %eax ++# endif ++ VZEROUPPER_RETURN + + # ifdef USE_AS_STRNLEN +- /* Adjust length. */ +- addq %rcx, %rsi ++L(zero): ++ xorl %eax, %eax ++ ret + +- subq $(VEC_SIZE * 4), %rsi +- jbe L(last_4x_vec_or_less) ++ .p2align 4 ++L(first_vec_x0): ++ /* Set bit for max len so that tzcnt will return min of max len ++ and position of first match. */ ++# ifdef USE_AS_WCSLEN ++ /* NB: Multiply length by 4 to get byte count. */ ++ sall $2, %esi ++# endif ++ btsq %rsi, %rax ++ tzcntl %eax, %eax ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ ++ shrl $2, %eax ++# endif ++ VZEROUPPER_RETURN + # endif +- jmp L(more_4x_vec) + + .p2align 4 +-L(cros_page_boundary): +- andl $(VEC_SIZE - 1), %ecx +- andq $-VEC_SIZE, %rdi +- VPCMPEQ (%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- /* Remove the leading bytes. */ +- sarl %cl, %eax +- testl %eax, %eax +- jz L(aligned_more) ++L(first_vec_x1): + tzcntl %eax, %eax ++ /* Safe to use 32 bit instructions as these are only called for ++ size = [1, 159]. */ + # ifdef USE_AS_STRNLEN +- /* Check the end of data. */ +- cmpq %rax, %rsi +- jbe L(max) ++ /* Use ecx which was computed earlier to compute correct value. ++ */ ++# ifdef USE_AS_WCSLEN ++ leal -(VEC_SIZE * 4 + 1)(%rax, %rcx, 4), %eax ++# else ++ subl $(VEC_SIZE * 4 + 1), %ecx ++ addl %ecx, %eax ++# endif ++# else ++ subl %edx, %edi ++ incl %edi ++ addl %edi, %eax + # endif +- addq %rdi, %rax +- addq %rcx, %rax +- subq %rdx, %rax + # ifdef USE_AS_WCSLEN +- shrq $2, %rax ++ /* NB: Divide bytes by 4 to get wchar_t count. */ ++ shrl $2, %eax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 +-L(aligned_more): ++L(first_vec_x2): ++ tzcntl %eax, %eax ++ /* Safe to use 32 bit instructions as these are only called for ++ size = [1, 159]. */ + # ifdef USE_AS_STRNLEN +- /* "rcx" is less than VEC_SIZE. Calculate "rdx + rcx - VEC_SIZE" +- with "rdx - (VEC_SIZE - rcx)" instead of "(rdx + rcx) - VEC_SIZE" +- to void possible addition overflow. */ +- negq %rcx +- addq $VEC_SIZE, %rcx +- +- /* Check the end of data. */ +- subq %rcx, %rsi +- jbe L(max) ++ /* Use ecx which was computed earlier to compute correct value. ++ */ ++# ifdef USE_AS_WCSLEN ++ leal -(VEC_SIZE * 3 + 1)(%rax, %rcx, 4), %eax ++# else ++ subl $(VEC_SIZE * 3 + 1), %ecx ++ addl %ecx, %eax ++# endif ++# else ++ subl %edx, %edi ++ addl $(VEC_SIZE + 1), %edi ++ addl %edi, %eax + # endif ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ ++ shrl $2, %eax ++# endif ++ VZEROUPPER_RETURN + +- addq $VEC_SIZE, %rdi ++ .p2align 4 ++L(first_vec_x3): ++ tzcntl %eax, %eax ++ /* Safe to use 32 bit instructions as these are only called for ++ size = [1, 159]. */ ++# ifdef USE_AS_STRNLEN ++ /* Use ecx which was computed earlier to compute correct value. ++ */ ++# ifdef USE_AS_WCSLEN ++ leal -(VEC_SIZE * 2 + 1)(%rax, %rcx, 4), %eax ++# else ++ subl $(VEC_SIZE * 2 + 1), %ecx ++ addl %ecx, %eax ++# endif ++# else ++ subl %edx, %edi ++ addl $(VEC_SIZE * 2 + 1), %edi ++ addl %edi, %eax ++# endif ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ ++ shrl $2, %eax ++# endif ++ VZEROUPPER_RETURN + ++ .p2align 4 ++L(first_vec_x4): ++ tzcntl %eax, %eax ++ /* Safe to use 32 bit instructions as these are only called for ++ size = [1, 159]. */ + # ifdef USE_AS_STRNLEN +- subq $(VEC_SIZE * 4), %rsi +- jbe L(last_4x_vec_or_less) ++ /* Use ecx which was computed earlier to compute correct value. ++ */ ++# ifdef USE_AS_WCSLEN ++ leal -(VEC_SIZE * 1 + 1)(%rax, %rcx, 4), %eax ++# else ++ subl $(VEC_SIZE + 1), %ecx ++ addl %ecx, %eax ++# endif ++# else ++ subl %edx, %edi ++ addl $(VEC_SIZE * 3 + 1), %edi ++ addl %edi, %eax + # endif ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ ++ shrl $2, %eax ++# endif ++ VZEROUPPER_RETURN + +-L(more_4x_vec): ++ .p2align 5 ++L(aligned_more): ++ /* Align data to VEC_SIZE - 1. This is the same number of ++ instructions as using andq with -VEC_SIZE but saves 4 bytes of ++ code on the x4 check. */ ++ orq $(VEC_SIZE - 1), %rdi ++L(cross_page_continue): + /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ +- VPCMPEQ (%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- testl %eax, %eax +- jnz L(first_vec_x0) +- +- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 ++# ifdef USE_AS_STRNLEN ++ /* + 1 because rdi is aligned to VEC_SIZE - 1. + CHAR_SIZE ++ because it simplies the logic in last_4x_vec_or_less. */ ++ leaq (VEC_SIZE * 4 + CHAR_SIZE + 1)(%rdi), %rcx ++ subq %rdx, %rcx ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %ecx ++# endif ++# endif ++ /* Load first VEC regardless. */ ++ VPCMPEQ 1(%rdi), %ymm0, %ymm1 ++# ifdef USE_AS_STRNLEN ++ /* Adjust length. If near end handle specially. */ ++ subq %rcx, %rsi ++ jb L(last_4x_vec_or_less) ++# endif + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(first_vec_x1) + +- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 ++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(first_vec_x2) + +- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 ++ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(first_vec_x3) + +- addq $(VEC_SIZE * 4), %rdi +- +-# ifdef USE_AS_STRNLEN +- subq $(VEC_SIZE * 4), %rsi +- jbe L(last_4x_vec_or_less) +-# endif +- +- /* Align data to 4 * VEC_SIZE. */ +- movq %rdi, %rcx +- andl $(4 * VEC_SIZE - 1), %ecx +- andq $-(4 * VEC_SIZE), %rdi ++ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x4) + ++ /* Align data to VEC_SIZE * 4 - 1. */ + # ifdef USE_AS_STRNLEN +- /* Adjust length. */ ++ /* Before adjusting length check if at last VEC_SIZE * 4. */ ++ cmpq $(CHAR_PER_VEC * 4 - 1), %rsi ++ jbe L(last_4x_vec_or_less_load) ++ incq %rdi ++ movl %edi, %ecx ++ orq $(VEC_SIZE * 4 - 1), %rdi ++ andl $(VEC_SIZE * 4 - 1), %ecx ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %ecx ++# endif ++ /* Readjust length. */ + addq %rcx, %rsi ++# else ++ incq %rdi ++ orq $(VEC_SIZE * 4 - 1), %rdi + # endif +- ++ /* Compare 4 * VEC at a time forward. */ + .p2align 4 + L(loop_4x_vec): +- /* Compare 4 * VEC at a time forward. */ +- vmovdqa (%rdi), %ymm1 +- vmovdqa VEC_SIZE(%rdi), %ymm2 +- vmovdqa (VEC_SIZE * 2)(%rdi), %ymm3 +- vmovdqa (VEC_SIZE * 3)(%rdi), %ymm4 +- VPMINU %ymm1, %ymm2, %ymm5 +- VPMINU %ymm3, %ymm4, %ymm6 +- VPMINU %ymm5, %ymm6, %ymm5 +- ++# ifdef USE_AS_STRNLEN ++ /* Break if at end of length. */ ++ subq $(CHAR_PER_VEC * 4), %rsi ++ jb L(last_4x_vec_or_less_cmpeq) ++# endif ++ /* Save some code size by microfusing VPMINU with the load. ++ Since the matches in ymm2/ymm4 can only be returned if there ++ where no matches in ymm1/ymm3 respectively there is no issue ++ with overlap. */ ++ vmovdqa 1(%rdi), %ymm1 ++ VPMINU (VEC_SIZE + 1)(%rdi), %ymm1, %ymm2 ++ vmovdqa (VEC_SIZE * 2 + 1)(%rdi), %ymm3 ++ VPMINU (VEC_SIZE * 3 + 1)(%rdi), %ymm3, %ymm4 ++ ++ VPMINU %ymm2, %ymm4, %ymm5 + VPCMPEQ %ymm5, %ymm0, %ymm5 +- vpmovmskb %ymm5, %eax +- testl %eax, %eax +- jnz L(4x_vec_end) ++ vpmovmskb %ymm5, %ecx + +- addq $(VEC_SIZE * 4), %rdi ++ subq $-(VEC_SIZE * 4), %rdi ++ testl %ecx, %ecx ++ jz L(loop_4x_vec) + +-# ifndef USE_AS_STRNLEN +- jmp L(loop_4x_vec) +-# else +- subq $(VEC_SIZE * 4), %rsi +- ja L(loop_4x_vec) + +-L(last_4x_vec_or_less): +- /* Less than 4 * VEC and aligned to VEC_SIZE. */ +- addl $(VEC_SIZE * 2), %esi +- jle L(last_2x_vec) +- +- VPCMPEQ (%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- testl %eax, %eax +- jnz L(first_vec_x0) +- +- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 ++ VPCMPEQ %ymm1, %ymm0, %ymm1 + vpmovmskb %ymm1, %eax ++ subq %rdx, %rdi + testl %eax, %eax +- jnz L(first_vec_x1) ++ jnz L(last_vec_return_x0) + +- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax ++ VPCMPEQ %ymm2, %ymm0, %ymm2 ++ vpmovmskb %ymm2, %eax + testl %eax, %eax ++ jnz L(last_vec_return_x1) + +- jnz L(first_vec_x2_check) +- subl $VEC_SIZE, %esi +- jle L(max) ++ /* Combine last 2 VEC. */ ++ VPCMPEQ %ymm3, %ymm0, %ymm3 ++ vpmovmskb %ymm3, %eax ++ /* rcx has combined result from all 4 VEC. It will only be used ++ if the first 3 other VEC all did not contain a match. */ ++ salq $32, %rcx ++ orq %rcx, %rax ++ tzcntq %rax, %rax ++ subq $(VEC_SIZE * 2 - 1), %rdi ++ addq %rdi, %rax ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ ++ shrq $2, %rax ++# endif ++ VZEROUPPER_RETURN + +- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- testl %eax, %eax + +- jnz L(first_vec_x3_check) +- movq %r8, %rax ++# ifdef USE_AS_STRNLEN ++ .p2align 4 ++L(last_4x_vec_or_less_load): ++ /* Depending on entry adjust rdi / prepare first VEC in ymm1. ++ */ ++ subq $-(VEC_SIZE * 4), %rdi ++L(last_4x_vec_or_less_cmpeq): ++ VPCMPEQ 1(%rdi), %ymm0, %ymm1 ++L(last_4x_vec_or_less): + # ifdef USE_AS_WCSLEN +- shrq $2, %rax ++ /* NB: Multiply length by 4 to get byte count. */ ++ sall $2, %esi + # endif +- VZEROUPPER +- ret +- +- .p2align 4 +-L(last_2x_vec): +- addl $(VEC_SIZE * 2), %esi +- VPCMPEQ (%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax ++ /* If remaining length > VEC_SIZE * 2. This works if esi is off ++ by VEC_SIZE * 4. */ ++ testl $(VEC_SIZE * 2), %esi ++ jnz L(last_4x_vec) ++ ++ /* length may have been negative or positive by an offset of ++ VEC_SIZE * 4 depending on where this was called from. This fixes ++ that. */ ++ andl $(VEC_SIZE * 4 - 1), %esi + testl %eax, %eax ++ jnz L(last_vec_x1_check) + +- jnz L(first_vec_x0_check) + subl $VEC_SIZE, %esi +- jle L(max) ++ jb L(max) + +- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 ++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax +- testl %eax, %eax +- jnz L(first_vec_x1_check) +- movq %r8, %rax +-# ifdef USE_AS_WCSLEN +- shrq $2, %rax +-# endif +- VZEROUPPER +- ret +- +- .p2align 4 +-L(first_vec_x0_check): + tzcntl %eax, %eax + /* Check the end of data. */ +- cmpq %rax, %rsi +- jbe L(max) ++ cmpl %eax, %esi ++ jb L(max) ++ subq %rdx, %rdi ++ addl $(VEC_SIZE + 1), %eax + addq %rdi, %rax +- subq %rdx, %rax + # ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ + shrq $2, %rax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN ++# endif + + .p2align 4 +-L(first_vec_x1_check): ++L(last_vec_return_x0): + tzcntl %eax, %eax +- /* Check the end of data. */ +- cmpq %rax, %rsi +- jbe L(max) +- addq $VEC_SIZE, %rax ++ subq $(VEC_SIZE * 4 - 1), %rdi + addq %rdi, %rax +- subq %rdx, %rax +-# ifdef USE_AS_WCSLEN ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ + shrq $2, %rax +-# endif +- VZEROUPPER +- ret ++# endif ++ VZEROUPPER_RETURN + + .p2align 4 +-L(first_vec_x2_check): ++L(last_vec_return_x1): + tzcntl %eax, %eax +- /* Check the end of data. */ +- cmpq %rax, %rsi +- jbe L(max) +- addq $(VEC_SIZE * 2), %rax ++ subq $(VEC_SIZE * 3 - 1), %rdi + addq %rdi, %rax +- subq %rdx, %rax +-# ifdef USE_AS_WCSLEN ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ + shrq $2, %rax +-# endif +- VZEROUPPER +- ret ++# endif ++ VZEROUPPER_RETURN + ++# ifdef USE_AS_STRNLEN + .p2align 4 +-L(first_vec_x3_check): ++L(last_vec_x1_check): ++ + tzcntl %eax, %eax + /* Check the end of data. */ +- cmpq %rax, %rsi +- jbe L(max) +- addq $(VEC_SIZE * 3), %rax ++ cmpl %eax, %esi ++ jb L(max) ++ subq %rdx, %rdi ++ incl %eax + addq %rdi, %rax +- subq %rdx, %rax + # ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ + shrq $2, %rax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + +- .p2align 4 + L(max): + movq %r8, %rax ++ VZEROUPPER_RETURN ++ ++ .p2align 4 ++L(last_4x_vec): ++ /* Test first 2x VEC normally. */ ++ testl %eax, %eax ++ jnz L(last_vec_x1) ++ ++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x2) ++ ++ /* Normalize length. */ ++ andl $(VEC_SIZE * 4 - 1), %esi ++ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x3) ++ ++ subl $(VEC_SIZE * 3), %esi ++ jb L(max) ++ ++ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %eax ++ tzcntl %eax, %eax ++ /* Check the end of data. */ ++ cmpl %eax, %esi ++ jb L(max) ++ subq %rdx, %rdi ++ addl $(VEC_SIZE * 3 + 1), %eax ++ addq %rdi, %rax + # ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ + shrq $2, %rax + # endif +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + +- .p2align 4 +-L(zero): +- xorl %eax, %eax +- ret +-# endif + + .p2align 4 +-L(first_vec_x0): ++L(last_vec_x1): ++ /* essentially duplicates of first_vec_x1 but use 64 bit ++ instructions. */ + tzcntl %eax, %eax ++ subq %rdx, %rdi ++ incl %eax + addq %rdi, %rax +- subq %rdx, %rax +-# ifdef USE_AS_WCSLEN ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ + shrq $2, %rax +-# endif +- VZEROUPPER +- ret ++# endif ++ VZEROUPPER_RETURN + + .p2align 4 +-L(first_vec_x1): ++L(last_vec_x2): ++ /* essentially duplicates of first_vec_x1 but use 64 bit ++ instructions. */ + tzcntl %eax, %eax +- addq $VEC_SIZE, %rax ++ subq %rdx, %rdi ++ addl $(VEC_SIZE + 1), %eax + addq %rdi, %rax +- subq %rdx, %rax +-# ifdef USE_AS_WCSLEN ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ + shrq $2, %rax +-# endif +- VZEROUPPER +- ret ++# endif ++ VZEROUPPER_RETURN + + .p2align 4 +-L(first_vec_x2): ++L(last_vec_x3): + tzcntl %eax, %eax +- addq $(VEC_SIZE * 2), %rax ++ subl $(VEC_SIZE * 2), %esi ++ /* Check the end of data. */ ++ cmpl %eax, %esi ++ jb L(max_end) ++ subq %rdx, %rdi ++ addl $(VEC_SIZE * 2 + 1), %eax + addq %rdi, %rax +- subq %rdx, %rax +-# ifdef USE_AS_WCSLEN ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ + shrq $2, %rax ++# endif ++ VZEROUPPER_RETURN ++L(max_end): ++ movq %r8, %rax ++ VZEROUPPER_RETURN + # endif +- VZEROUPPER +- ret + ++ /* Cold case for crossing page with first load. */ + .p2align 4 +-L(4x_vec_end): +- VPCMPEQ %ymm1, %ymm0, %ymm1 ++L(cross_page_boundary): ++ /* Align data to VEC_SIZE - 1. */ ++ orq $(VEC_SIZE - 1), %rdi ++ VPCMPEQ -(VEC_SIZE - 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax ++ /* Remove the leading bytes. sarxl only uses bits [5:0] of COUNT ++ so no need to manually mod rdx. */ ++ sarxl %edx, %eax, %eax ++# ifdef USE_AS_STRNLEN + testl %eax, %eax +- jnz L(first_vec_x0) +- VPCMPEQ %ymm2, %ymm0, %ymm2 +- vpmovmskb %ymm2, %eax +- testl %eax, %eax +- jnz L(first_vec_x1) +- VPCMPEQ %ymm3, %ymm0, %ymm3 +- vpmovmskb %ymm3, %eax ++ jnz L(cross_page_less_vec) ++ leaq 1(%rdi), %rcx ++ subq %rdx, %rcx ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get wchar_t count. */ ++ shrl $2, %ecx ++# endif ++ /* Check length. */ ++ cmpq %rsi, %rcx ++ jb L(cross_page_continue) ++ movq %r8, %rax ++# else + testl %eax, %eax +- jnz L(first_vec_x2) +- VPCMPEQ %ymm4, %ymm0, %ymm4 +- vpmovmskb %ymm4, %eax +-L(first_vec_x3): ++ jz L(cross_page_continue) + tzcntl %eax, %eax +- addq $(VEC_SIZE * 3), %rax +- addq %rdi, %rax +- subq %rdx, %rax +-# ifdef USE_AS_WCSLEN +- shrq $2, %rax ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide length by 4 to get wchar_t count. */ ++ shrl $2, %eax ++# endif ++# endif ++L(return_vzeroupper): ++ ZERO_UPPER_VEC_REGISTERS_RETURN ++ ++# ifdef USE_AS_STRNLEN ++ .p2align 4 ++L(cross_page_less_vec): ++ tzcntl %eax, %eax ++# ifdef USE_AS_WCSLEN ++ /* NB: Multiply length by 4 to get byte count. */ ++ sall $2, %esi ++# endif ++ cmpq %rax, %rsi ++ cmovb %esi, %eax ++# ifdef USE_AS_WCSLEN ++ shrl $2, %eax ++# endif ++ VZEROUPPER_RETURN + # endif +- VZEROUPPER +- ret + + END (STRLEN) + #endif +diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S +new file mode 100644 +index 0000000000..4bf6874b82 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strlen-evex.S +@@ -0,0 +1,489 @@ ++/* strlen/strnlen/wcslen/wcsnlen optimized with 256-bit EVEX instructions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++ ++# include <sysdep.h> ++ ++# ifndef STRLEN ++# define STRLEN __strlen_evex ++# endif ++ ++# define VMOVA vmovdqa64 ++ ++# ifdef USE_AS_WCSLEN ++# define VPCMP vpcmpd ++# define VPMINU vpminud ++# define SHIFT_REG ecx ++# define CHAR_SIZE 4 ++# else ++# define VPCMP vpcmpb ++# define VPMINU vpminub ++# define SHIFT_REG edx ++# define CHAR_SIZE 1 ++# endif ++ ++# define XMMZERO xmm16 ++# define YMMZERO ymm16 ++# define YMM1 ymm17 ++# define YMM2 ymm18 ++# define YMM3 ymm19 ++# define YMM4 ymm20 ++# define YMM5 ymm21 ++# define YMM6 ymm22 ++ ++# define VEC_SIZE 32 ++# define PAGE_SIZE 4096 ++# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) ++ ++ .section .text.evex,"ax",@progbits ++ENTRY (STRLEN) ++# ifdef USE_AS_STRNLEN ++ /* Check zero length. */ ++ test %RSI_LP, %RSI_LP ++ jz L(zero) ++# ifdef __ILP32__ ++ /* Clear the upper 32 bits. */ ++ movl %esi, %esi ++# endif ++ mov %RSI_LP, %R8_LP ++# endif ++ movl %edi, %eax ++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO ++ /* Clear high bits from edi. Only keeping bits relevant to page ++ cross check. */ ++ andl $(PAGE_SIZE - 1), %eax ++ /* Check if we may cross page boundary with one vector load. */ ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ++ ja L(cross_page_boundary) ++ ++ /* Check the first VEC_SIZE bytes. Each bit in K0 represents a ++ null byte. */ ++ VPCMP $0, (%rdi), %YMMZERO, %k0 ++ kmovd %k0, %eax ++# ifdef USE_AS_STRNLEN ++ /* If length < CHAR_PER_VEC handle special. */ ++ cmpq $CHAR_PER_VEC, %rsi ++ jbe L(first_vec_x0) ++# endif ++ testl %eax, %eax ++ jz L(aligned_more) ++ tzcntl %eax, %eax ++ ret ++# ifdef USE_AS_STRNLEN ++L(zero): ++ xorl %eax, %eax ++ ret ++ ++ .p2align 4 ++L(first_vec_x0): ++ /* Set bit for max len so that tzcnt will return min of max len ++ and position of first match. */ ++ btsq %rsi, %rax ++ tzcntl %eax, %eax ++ ret ++# endif ++ ++ .p2align 4 ++L(first_vec_x1): ++ tzcntl %eax, %eax ++ /* Safe to use 32 bit instructions as these are only called for ++ size = [1, 159]. */ ++# ifdef USE_AS_STRNLEN ++ /* Use ecx which was computed earlier to compute correct value. ++ */ ++ leal -(CHAR_PER_VEC * 4 + 1)(%rcx, %rax), %eax ++# else ++ subl %edx, %edi ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %edi ++# endif ++ leal CHAR_PER_VEC(%rdi, %rax), %eax ++# endif ++ ret ++ ++ .p2align 4 ++L(first_vec_x2): ++ tzcntl %eax, %eax ++ /* Safe to use 32 bit instructions as these are only called for ++ size = [1, 159]. */ ++# ifdef USE_AS_STRNLEN ++ /* Use ecx which was computed earlier to compute correct value. ++ */ ++ leal -(CHAR_PER_VEC * 3 + 1)(%rcx, %rax), %eax ++# else ++ subl %edx, %edi ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %edi ++# endif ++ leal (CHAR_PER_VEC * 2)(%rdi, %rax), %eax ++# endif ++ ret ++ ++ .p2align 4 ++L(first_vec_x3): ++ tzcntl %eax, %eax ++ /* Safe to use 32 bit instructions as these are only called for ++ size = [1, 159]. */ ++# ifdef USE_AS_STRNLEN ++ /* Use ecx which was computed earlier to compute correct value. ++ */ ++ leal -(CHAR_PER_VEC * 2 + 1)(%rcx, %rax), %eax ++# else ++ subl %edx, %edi ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %edi ++# endif ++ leal (CHAR_PER_VEC * 3)(%rdi, %rax), %eax ++# endif ++ ret ++ ++ .p2align 4 ++L(first_vec_x4): ++ tzcntl %eax, %eax ++ /* Safe to use 32 bit instructions as these are only called for ++ size = [1, 159]. */ ++# ifdef USE_AS_STRNLEN ++ /* Use ecx which was computed earlier to compute correct value. ++ */ ++ leal -(CHAR_PER_VEC + 1)(%rcx, %rax), %eax ++# else ++ subl %edx, %edi ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %edi ++# endif ++ leal (CHAR_PER_VEC * 4)(%rdi, %rax), %eax ++# endif ++ ret ++ ++ .p2align 5 ++L(aligned_more): ++ movq %rdi, %rdx ++ /* Align data to VEC_SIZE. */ ++ andq $-(VEC_SIZE), %rdi ++L(cross_page_continue): ++ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time ++ since data is only aligned to VEC_SIZE. */ ++# ifdef USE_AS_STRNLEN ++ /* + CHAR_SIZE because it simplies the logic in ++ last_4x_vec_or_less. */ ++ leaq (VEC_SIZE * 5 + CHAR_SIZE)(%rdi), %rcx ++ subq %rdx, %rcx ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %ecx ++# endif ++# endif ++ /* Load first VEC regardless. */ ++ VPCMP $0, VEC_SIZE(%rdi), %YMMZERO, %k0 ++# ifdef USE_AS_STRNLEN ++ /* Adjust length. If near end handle specially. */ ++ subq %rcx, %rsi ++ jb L(last_4x_vec_or_less) ++# endif ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x1) ++ ++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 ++ kmovd %k0, %eax ++ test %eax, %eax ++ jnz L(first_vec_x2) ++ ++ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x3) ++ ++ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(first_vec_x4) ++ ++ addq $VEC_SIZE, %rdi ++# ifdef USE_AS_STRNLEN ++ /* Check if at last VEC_SIZE * 4 length. */ ++ cmpq $(CHAR_PER_VEC * 4 - 1), %rsi ++ jbe L(last_4x_vec_or_less_load) ++ movl %edi, %ecx ++ andl $(VEC_SIZE * 4 - 1), %ecx ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarl $2, %ecx ++# endif ++ /* Readjust length. */ ++ addq %rcx, %rsi ++# endif ++ /* Align data to VEC_SIZE * 4. */ ++ andq $-(VEC_SIZE * 4), %rdi ++ ++ /* Compare 4 * VEC at a time forward. */ ++ .p2align 4 ++L(loop_4x_vec): ++ /* Load first VEC regardless. */ ++ VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 ++# ifdef USE_AS_STRNLEN ++ /* Break if at end of length. */ ++ subq $(CHAR_PER_VEC * 4), %rsi ++ jb L(last_4x_vec_or_less_cmpeq) ++# endif ++ /* Save some code size by microfusing VPMINU with the load. Since ++ the matches in ymm2/ymm4 can only be returned if there where no ++ matches in ymm1/ymm3 respectively there is no issue with overlap. ++ */ ++ VPMINU (VEC_SIZE * 5)(%rdi), %YMM1, %YMM2 ++ VMOVA (VEC_SIZE * 6)(%rdi), %YMM3 ++ VPMINU (VEC_SIZE * 7)(%rdi), %YMM3, %YMM4 ++ ++ VPCMP $0, %YMM2, %YMMZERO, %k0 ++ VPCMP $0, %YMM4, %YMMZERO, %k1 ++ subq $-(VEC_SIZE * 4), %rdi ++ kortestd %k0, %k1 ++ jz L(loop_4x_vec) ++ ++ /* Check if end was in first half. */ ++ kmovd %k0, %eax ++ subq %rdx, %rdi ++# ifdef USE_AS_WCSLEN ++ shrq $2, %rdi ++# endif ++ testl %eax, %eax ++ jz L(second_vec_return) ++ ++ VPCMP $0, %YMM1, %YMMZERO, %k2 ++ kmovd %k2, %edx ++ /* Combine VEC1 matches (edx) with VEC2 matches (eax). */ ++# ifdef USE_AS_WCSLEN ++ sall $CHAR_PER_VEC, %eax ++ orl %edx, %eax ++ tzcntl %eax, %eax ++# else ++ salq $CHAR_PER_VEC, %rax ++ orq %rdx, %rax ++ tzcntq %rax, %rax ++# endif ++ addq %rdi, %rax ++ ret ++ ++ ++# ifdef USE_AS_STRNLEN ++ ++L(last_4x_vec_or_less_load): ++ /* Depending on entry adjust rdi / prepare first VEC in YMM1. */ ++ VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 ++L(last_4x_vec_or_less_cmpeq): ++ VPCMP $0, %YMM1, %YMMZERO, %k0 ++ addq $(VEC_SIZE * 3), %rdi ++L(last_4x_vec_or_less): ++ kmovd %k0, %eax ++ /* If remaining length > VEC_SIZE * 2. This works if esi is off by ++ VEC_SIZE * 4. */ ++ testl $(CHAR_PER_VEC * 2), %esi ++ jnz L(last_4x_vec) ++ ++ /* length may have been negative or positive by an offset of ++ CHAR_PER_VEC * 4 depending on where this was called from. This ++ fixes that. */ ++ andl $(CHAR_PER_VEC * 4 - 1), %esi ++ testl %eax, %eax ++ jnz L(last_vec_x1_check) ++ ++ /* Check the end of data. */ ++ subl $CHAR_PER_VEC, %esi ++ jb L(max) ++ ++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 ++ kmovd %k0, %eax ++ tzcntl %eax, %eax ++ /* Check the end of data. */ ++ cmpl %eax, %esi ++ jb L(max) ++ ++ subq %rdx, %rdi ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarq $2, %rdi ++# endif ++ leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax ++ ret ++L(max): ++ movq %r8, %rax ++ ret ++# endif ++ ++ /* Placed here in strnlen so that the jcc L(last_4x_vec_or_less) ++ in the 4x VEC loop can use 2 byte encoding. */ ++ .p2align 4 ++L(second_vec_return): ++ VPCMP $0, %YMM3, %YMMZERO, %k0 ++ /* Combine YMM3 matches (k0) with YMM4 matches (k1). */ ++# ifdef USE_AS_WCSLEN ++ kunpckbw %k0, %k1, %k0 ++ kmovd %k0, %eax ++ tzcntl %eax, %eax ++# else ++ kunpckdq %k0, %k1, %k0 ++ kmovq %k0, %rax ++ tzcntq %rax, %rax ++# endif ++ leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax ++ ret ++ ++ ++# ifdef USE_AS_STRNLEN ++L(last_vec_x1_check): ++ tzcntl %eax, %eax ++ /* Check the end of data. */ ++ cmpl %eax, %esi ++ jb L(max) ++ subq %rdx, %rdi ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarq $2, %rdi ++# endif ++ leaq (CHAR_PER_VEC)(%rdi, %rax), %rax ++ ret ++ ++ .p2align 4 ++L(last_4x_vec): ++ /* Test first 2x VEC normally. */ ++ testl %eax, %eax ++ jnz L(last_vec_x1) ++ ++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x2) ++ ++ /* Normalize length. */ ++ andl $(CHAR_PER_VEC * 4 - 1), %esi ++ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(last_vec_x3) ++ ++ /* Check the end of data. */ ++ subl $(CHAR_PER_VEC * 3), %esi ++ jb L(max) ++ ++ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0 ++ kmovd %k0, %eax ++ tzcntl %eax, %eax ++ /* Check the end of data. */ ++ cmpl %eax, %esi ++ jb L(max_end) ++ ++ subq %rdx, %rdi ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarq $2, %rdi ++# endif ++ leaq (CHAR_PER_VEC * 4)(%rdi, %rax), %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_x1): ++ tzcntl %eax, %eax ++ subq %rdx, %rdi ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarq $2, %rdi ++# endif ++ leaq (CHAR_PER_VEC)(%rdi, %rax), %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_x2): ++ tzcntl %eax, %eax ++ subq %rdx, %rdi ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarq $2, %rdi ++# endif ++ leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax ++ ret ++ ++ .p2align 4 ++L(last_vec_x3): ++ tzcntl %eax, %eax ++ subl $(CHAR_PER_VEC * 2), %esi ++ /* Check the end of data. */ ++ cmpl %eax, %esi ++ jb L(max_end) ++ subq %rdx, %rdi ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide bytes by 4 to get the wchar_t count. */ ++ sarq $2, %rdi ++# endif ++ leaq (CHAR_PER_VEC * 3)(%rdi, %rax), %rax ++ ret ++L(max_end): ++ movq %r8, %rax ++ ret ++# endif ++ ++ /* Cold case for crossing page with first load. */ ++ .p2align 4 ++L(cross_page_boundary): ++ movq %rdi, %rdx ++ /* Align data to VEC_SIZE. */ ++ andq $-VEC_SIZE, %rdi ++ VPCMP $0, (%rdi), %YMMZERO, %k0 ++ kmovd %k0, %eax ++ /* Remove the leading bytes. */ ++# ifdef USE_AS_WCSLEN ++ /* NB: Divide shift count by 4 since each bit in K0 represent 4 ++ bytes. */ ++ movl %edx, %ecx ++ shrl $2, %ecx ++ andl $(CHAR_PER_VEC - 1), %ecx ++# endif ++ /* SHIFT_REG is ecx for USE_AS_WCSLEN and edx otherwise. */ ++ sarxl %SHIFT_REG, %eax, %eax ++ testl %eax, %eax ++# ifndef USE_AS_STRNLEN ++ jz L(cross_page_continue) ++ tzcntl %eax, %eax ++ ret ++# else ++ jnz L(cross_page_less_vec) ++# ifndef USE_AS_WCSLEN ++ movl %edx, %ecx ++ andl $(CHAR_PER_VEC - 1), %ecx ++# endif ++ movl $CHAR_PER_VEC, %eax ++ subl %ecx, %eax ++ /* Check the end of data. */ ++ cmpq %rax, %rsi ++ ja L(cross_page_continue) ++ movl %esi, %eax ++ ret ++L(cross_page_less_vec): ++ tzcntl %eax, %eax ++ /* Select min of length and position of first null. */ ++ cmpq %rax, %rsi ++ cmovb %esi, %eax ++ ret ++# endif ++ ++END (STRLEN) ++#endif +diff --git a/sysdeps/x86_64/multiarch/strlen-sse2.S b/sysdeps/x86_64/multiarch/strlen-sse2.S +index 055fbbc690..812af73c13 100644 +--- a/sysdeps/x86_64/multiarch/strlen-sse2.S ++++ b/sysdeps/x86_64/multiarch/strlen-sse2.S +@@ -20,4 +20,4 @@ + # define strlen __strlen_sse2 + #endif + +-#include "../strlen.S" ++#include "strlen-vec.S" +diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S +new file mode 100644 +index 0000000000..439e486a43 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strlen-vec.S +@@ -0,0 +1,270 @@ ++/* SSE2 version of strlen and SSE4.1 version of wcslen. ++ Copyright (C) 2012-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++ ++#ifdef AS_WCSLEN ++# define PMINU pminud ++# define PCMPEQ pcmpeqd ++# define SHIFT_RETURN shrq $2, %rax ++#else ++# define PMINU pminub ++# define PCMPEQ pcmpeqb ++# define SHIFT_RETURN ++#endif ++ ++/* Long lived register in strlen(s), strnlen(s, n) are: ++ ++ %xmm3 - zero ++ %rdi - s ++ %r10 (s+n) & (~(64-1)) ++ %r11 s+n ++*/ ++ ++ ++.text ++ENTRY(strlen) ++ ++/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */ ++#define FIND_ZERO \ ++ PCMPEQ (%rax), %xmm0; \ ++ PCMPEQ 16(%rax), %xmm1; \ ++ PCMPEQ 32(%rax), %xmm2; \ ++ PCMPEQ 48(%rax), %xmm3; \ ++ pmovmskb %xmm0, %esi; \ ++ pmovmskb %xmm1, %edx; \ ++ pmovmskb %xmm2, %r8d; \ ++ pmovmskb %xmm3, %ecx; \ ++ salq $16, %rdx; \ ++ salq $16, %rcx; \ ++ orq %rsi, %rdx; \ ++ orq %r8, %rcx; \ ++ salq $32, %rcx; \ ++ orq %rcx, %rdx; ++ ++#ifdef AS_STRNLEN ++/* Do not read anything when n==0. */ ++ test %RSI_LP, %RSI_LP ++ jne L(n_nonzero) ++ xor %rax, %rax ++ ret ++L(n_nonzero): ++# ifdef AS_WCSLEN ++/* Check for overflow from maxlen * sizeof(wchar_t). If it would ++ overflow the only way this program doesn't have undefined behavior ++ is if there is a null terminator in valid memory so wcslen will ++ suffice. */ ++ mov %RSI_LP, %R10_LP ++ sar $62, %R10_LP ++ test %R10_LP, %R10_LP ++ jnz __wcslen_sse4_1 ++ sal $2, %RSI_LP ++# endif ++ ++ ++/* Initialize long lived registers. */ ++ ++ add %RDI_LP, %RSI_LP ++# ifdef AS_WCSLEN ++/* Check for overflow again from s + maxlen * sizeof(wchar_t). */ ++ jbe __wcslen_sse4_1 ++# endif ++ mov %RSI_LP, %R10_LP ++ and $-64, %R10_LP ++ mov %RSI_LP, %R11_LP ++#endif ++ ++ pxor %xmm0, %xmm0 ++ pxor %xmm1, %xmm1 ++ pxor %xmm2, %xmm2 ++ pxor %xmm3, %xmm3 ++ movq %rdi, %rax ++ movq %rdi, %rcx ++ andq $4095, %rcx ++/* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */ ++ cmpq $4047, %rcx ++/* We cannot unify this branching as it would be ~6 cycles slower. */ ++ ja L(cross_page) ++ ++#ifdef AS_STRNLEN ++/* Test if end is among first 64 bytes. */ ++# define STRNLEN_PROLOG \ ++ mov %r11, %rsi; \ ++ subq %rax, %rsi; \ ++ andq $-64, %rax; \ ++ testq $-64, %rsi; \ ++ je L(strnlen_ret) ++#else ++# define STRNLEN_PROLOG andq $-64, %rax; ++#endif ++ ++/* Ignore bits in mask that come before start of string. */ ++#define PROLOG(lab) \ ++ movq %rdi, %rcx; \ ++ xorq %rax, %rcx; \ ++ STRNLEN_PROLOG; \ ++ sarq %cl, %rdx; \ ++ test %rdx, %rdx; \ ++ je L(lab); \ ++ bsfq %rdx, %rax; \ ++ SHIFT_RETURN; \ ++ ret ++ ++#ifdef AS_STRNLEN ++ andq $-16, %rax ++ FIND_ZERO ++#else ++ /* Test first 16 bytes unaligned. */ ++ movdqu (%rax), %xmm4 ++ PCMPEQ %xmm0, %xmm4 ++ pmovmskb %xmm4, %edx ++ test %edx, %edx ++ je L(next48_bytes) ++ bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */ ++ SHIFT_RETURN ++ ret ++ ++L(next48_bytes): ++/* Same as FIND_ZERO except we do not check first 16 bytes. */ ++ andq $-16, %rax ++ PCMPEQ 16(%rax), %xmm1 ++ PCMPEQ 32(%rax), %xmm2 ++ PCMPEQ 48(%rax), %xmm3 ++ pmovmskb %xmm1, %edx ++ pmovmskb %xmm2, %r8d ++ pmovmskb %xmm3, %ecx ++ salq $16, %rdx ++ salq $16, %rcx ++ orq %r8, %rcx ++ salq $32, %rcx ++ orq %rcx, %rdx ++#endif ++ ++ /* When no zero byte is found xmm1-3 are zero so we do not have to ++ zero them. */ ++ PROLOG(loop) ++ ++ .p2align 4 ++L(cross_page): ++ andq $-64, %rax ++ FIND_ZERO ++ PROLOG(loop_init) ++ ++#ifdef AS_STRNLEN ++/* We must do this check to correctly handle strnlen (s, -1). */ ++L(strnlen_ret): ++ bts %rsi, %rdx ++ sarq %cl, %rdx ++ test %rdx, %rdx ++ je L(loop_init) ++ bsfq %rdx, %rax ++ SHIFT_RETURN ++ ret ++#endif ++ .p2align 4 ++L(loop_init): ++ pxor %xmm1, %xmm1 ++ pxor %xmm2, %xmm2 ++ pxor %xmm3, %xmm3 ++#ifdef AS_STRNLEN ++ .p2align 4 ++L(loop): ++ ++ addq $64, %rax ++ cmpq %rax, %r10 ++ je L(exit_end) ++ ++ movdqa (%rax), %xmm0 ++ PMINU 16(%rax), %xmm0 ++ PMINU 32(%rax), %xmm0 ++ PMINU 48(%rax), %xmm0 ++ PCMPEQ %xmm3, %xmm0 ++ pmovmskb %xmm0, %edx ++ testl %edx, %edx ++ jne L(exit) ++ jmp L(loop) ++ ++ .p2align 4 ++L(exit_end): ++ cmp %rax, %r11 ++ je L(first) /* Do not read when end is at page boundary. */ ++ pxor %xmm0, %xmm0 ++ FIND_ZERO ++ ++L(first): ++ bts %r11, %rdx ++ bsfq %rdx, %rdx ++ addq %rdx, %rax ++ subq %rdi, %rax ++ SHIFT_RETURN ++ ret ++ ++ .p2align 4 ++L(exit): ++ pxor %xmm0, %xmm0 ++ FIND_ZERO ++ ++ bsfq %rdx, %rdx ++ addq %rdx, %rax ++ subq %rdi, %rax ++ SHIFT_RETURN ++ ret ++ ++#else ++ ++ /* Main loop. Unrolled twice to improve L2 cache performance on core2. */ ++ .p2align 4 ++L(loop): ++ ++ movdqa 64(%rax), %xmm0 ++ PMINU 80(%rax), %xmm0 ++ PMINU 96(%rax), %xmm0 ++ PMINU 112(%rax), %xmm0 ++ PCMPEQ %xmm3, %xmm0 ++ pmovmskb %xmm0, %edx ++ testl %edx, %edx ++ jne L(exit64) ++ ++ subq $-128, %rax ++ ++ movdqa (%rax), %xmm0 ++ PMINU 16(%rax), %xmm0 ++ PMINU 32(%rax), %xmm0 ++ PMINU 48(%rax), %xmm0 ++ PCMPEQ %xmm3, %xmm0 ++ pmovmskb %xmm0, %edx ++ testl %edx, %edx ++ jne L(exit0) ++ jmp L(loop) ++ ++ .p2align 4 ++L(exit64): ++ addq $64, %rax ++L(exit0): ++ pxor %xmm0, %xmm0 ++ FIND_ZERO ++ ++ bsfq %rdx, %rdx ++ addq %rdx, %rax ++ subq %rdi, %rax ++ SHIFT_RETURN ++ ret ++ ++#endif ++ ++END(strlen) +diff --git a/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S +new file mode 100644 +index 0000000000..0dcea18dbb +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S +@@ -0,0 +1,3 @@ ++#define USE_AS_STRNCAT ++#define STRCAT __strncat_avx2_rtm ++#include "strcat-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/strncat-evex.S b/sysdeps/x86_64/multiarch/strncat-evex.S +new file mode 100644 +index 0000000000..8884f02371 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strncat-evex.S +@@ -0,0 +1,3 @@ ++#define USE_AS_STRNCAT ++#define STRCAT __strncat_evex ++#include "strcat-evex.S" +diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S +new file mode 100644 +index 0000000000..37d1224bb9 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S +@@ -0,0 +1,3 @@ ++#define STRCMP __strncmp_avx2_rtm ++#define USE_AS_STRNCMP 1 ++#include "strcmp-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/strncmp-evex.S b/sysdeps/x86_64/multiarch/strncmp-evex.S +new file mode 100644 +index 0000000000..a1d53e8c9f +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strncmp-evex.S +@@ -0,0 +1,3 @@ ++#define STRCMP __strncmp_evex ++#define USE_AS_STRNCMP 1 ++#include "strcmp-evex.S" +diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c +index 3c94b3ffd9..7accba2b7c 100644 +--- a/sysdeps/x86_64/multiarch/strncmp.c ++++ b/sysdeps/x86_64/multiarch/strncmp.c +@@ -30,16 +30,29 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + +- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) +- && CPU_FEATURE_USABLE_P (cpu_features, AVX2) ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) +- return OPTIMIZE (avx2); ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2) ++ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP)) ++ return OPTIMIZE (evex); ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ return OPTIMIZE (avx2_rtm); ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) ++ return OPTIMIZE (avx2); ++ } + + if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) + && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) +diff --git a/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S +new file mode 100644 +index 0000000000..79e7083299 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S +@@ -0,0 +1,3 @@ ++#define USE_AS_STRNCPY ++#define STRCPY __strncpy_avx2_rtm ++#include "strcpy-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/strncpy-evex.S b/sysdeps/x86_64/multiarch/strncpy-evex.S +new file mode 100644 +index 0000000000..40e391f0da +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strncpy-evex.S +@@ -0,0 +1,3 @@ ++#define USE_AS_STRNCPY ++#define STRCPY __strncpy_evex ++#include "strcpy-evex.S" +diff --git a/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S +new file mode 100644 +index 0000000000..04f1626a5c +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S +@@ -0,0 +1,4 @@ ++#define STRLEN __strnlen_avx2_rtm ++#define USE_AS_STRNLEN 1 ++ ++#include "strlen-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S +new file mode 100644 +index 0000000000..722022f303 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strnlen-evex.S +@@ -0,0 +1,4 @@ ++#define STRLEN __strnlen_evex ++#define USE_AS_STRNLEN 1 ++ ++#include "strlen-evex.S" +diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S +new file mode 100644 +index 0000000000..5def14ec1c +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S +@@ -0,0 +1,12 @@ ++#ifndef STRRCHR ++# define STRRCHR __strrchr_avx2_rtm ++#endif ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define SECTION(p) p##.avx.rtm ++ ++#include "strrchr-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S +index 146bdd51d0..ad91fab991 100644 +--- a/sysdeps/x86_64/multiarch/strrchr-avx2.S ++++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S +@@ -36,9 +36,13 @@ + # define VZEROUPPER vzeroupper + # endif + ++# ifndef SECTION ++# define SECTION(p) p##.avx ++# endif ++ + # define VEC_SIZE 32 + +- .section .text.avx,"ax",@progbits ++ .section SECTION(.text),"ax",@progbits + ENTRY (STRRCHR) + movd %esi, %xmm4 + movl %edi, %ecx +@@ -166,8 +170,8 @@ L(return_value): + # endif + bsrl %eax, %eax + leaq -VEC_SIZE(%rdi, %rax), %rax +- VZEROUPPER +- ret ++L(return_vzeroupper): ++ ZERO_UPPER_VEC_REGISTERS_RETURN + + .p2align 4 + L(match): +@@ -198,8 +202,7 @@ L(find_nul): + jz L(return_value) + bsrl %eax, %eax + leaq -VEC_SIZE(%rdi, %rax), %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(char_and_nul): +@@ -222,14 +225,12 @@ L(char_and_nul_in_first_vec): + jz L(return_null) + bsrl %eax, %eax + leaq -VEC_SIZE(%rdi, %rax), %rax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + .p2align 4 + L(return_null): + xorl %eax, %eax +- VZEROUPPER +- ret ++ VZEROUPPER_RETURN + + END (STRRCHR) + #endif +diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S +new file mode 100644 +index 0000000000..f920b5a584 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strrchr-evex.S +@@ -0,0 +1,265 @@ ++/* strrchr/wcsrchr optimized with 256-bit EVEX instructions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++ ++# include <sysdep.h> ++ ++# ifndef STRRCHR ++# define STRRCHR __strrchr_evex ++# endif ++ ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 ++ ++# ifdef USE_AS_WCSRCHR ++# define VPBROADCAST vpbroadcastd ++# define VPCMP vpcmpd ++# define SHIFT_REG r8d ++# else ++# define VPBROADCAST vpbroadcastb ++# define VPCMP vpcmpb ++# define SHIFT_REG ecx ++# endif ++ ++# define XMMZERO xmm16 ++# define YMMZERO ymm16 ++# define YMMMATCH ymm17 ++# define YMM1 ymm18 ++ ++# define VEC_SIZE 32 ++ ++ .section .text.evex,"ax",@progbits ++ENTRY (STRRCHR) ++ movl %edi, %ecx ++ /* Broadcast CHAR to YMMMATCH. */ ++ VPBROADCAST %esi, %YMMMATCH ++ ++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO ++ ++ /* Check if we may cross page boundary with one vector load. */ ++ andl $(2 * VEC_SIZE - 1), %ecx ++ cmpl $VEC_SIZE, %ecx ++ ja L(cros_page_boundary) ++ ++ VMOVU (%rdi), %YMM1 ++ ++ /* Each bit in K0 represents a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM1, %k0 ++ /* Each bit in K1 represents a CHAR in YMM1. */ ++ VPCMP $0, %YMMMATCH, %YMM1, %k1 ++ kmovd %k0, %ecx ++ kmovd %k1, %eax ++ ++ addq $VEC_SIZE, %rdi ++ ++ testl %eax, %eax ++ jnz L(first_vec) ++ ++ testl %ecx, %ecx ++ jnz L(return_null) ++ ++ andq $-VEC_SIZE, %rdi ++ xorl %edx, %edx ++ jmp L(aligned_loop) ++ ++ .p2align 4 ++L(first_vec): ++ /* Check if there is a null byte. */ ++ testl %ecx, %ecx ++ jnz L(char_and_nul_in_first_vec) ++ ++ /* Remember the match and keep searching. */ ++ movl %eax, %edx ++ movq %rdi, %rsi ++ andq $-VEC_SIZE, %rdi ++ jmp L(aligned_loop) ++ ++ .p2align 4 ++L(cros_page_boundary): ++ andl $(VEC_SIZE - 1), %ecx ++ andq $-VEC_SIZE, %rdi ++ ++# ifdef USE_AS_WCSRCHR ++ /* NB: Divide shift count by 4 since each bit in K1 represent 4 ++ bytes. */ ++ movl %ecx, %SHIFT_REG ++ sarl $2, %SHIFT_REG ++# endif ++ ++ VMOVA (%rdi), %YMM1 ++ ++ /* Each bit in K0 represents a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM1, %k0 ++ /* Each bit in K1 represents a CHAR in YMM1. */ ++ VPCMP $0, %YMMMATCH, %YMM1, %k1 ++ kmovd %k0, %edx ++ kmovd %k1, %eax ++ ++ shrxl %SHIFT_REG, %edx, %edx ++ shrxl %SHIFT_REG, %eax, %eax ++ addq $VEC_SIZE, %rdi ++ ++ /* Check if there is a CHAR. */ ++ testl %eax, %eax ++ jnz L(found_char) ++ ++ testl %edx, %edx ++ jnz L(return_null) ++ ++ jmp L(aligned_loop) ++ ++ .p2align 4 ++L(found_char): ++ testl %edx, %edx ++ jnz L(char_and_nul) ++ ++ /* Remember the match and keep searching. */ ++ movl %eax, %edx ++ leaq (%rdi, %rcx), %rsi ++ ++ .p2align 4 ++L(aligned_loop): ++ VMOVA (%rdi), %YMM1 ++ addq $VEC_SIZE, %rdi ++ ++ /* Each bit in K0 represents a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM1, %k0 ++ /* Each bit in K1 represents a CHAR in YMM1. */ ++ VPCMP $0, %YMMMATCH, %YMM1, %k1 ++ kmovd %k0, %ecx ++ kmovd %k1, %eax ++ orl %eax, %ecx ++ jnz L(char_nor_null) ++ ++ VMOVA (%rdi), %YMM1 ++ add $VEC_SIZE, %rdi ++ ++ /* Each bit in K0 represents a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM1, %k0 ++ /* Each bit in K1 represents a CHAR in YMM1. */ ++ VPCMP $0, %YMMMATCH, %YMM1, %k1 ++ kmovd %k0, %ecx ++ kmovd %k1, %eax ++ orl %eax, %ecx ++ jnz L(char_nor_null) ++ ++ VMOVA (%rdi), %YMM1 ++ addq $VEC_SIZE, %rdi ++ ++ /* Each bit in K0 represents a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM1, %k0 ++ /* Each bit in K1 represents a CHAR in YMM1. */ ++ VPCMP $0, %YMMMATCH, %YMM1, %k1 ++ kmovd %k0, %ecx ++ kmovd %k1, %eax ++ orl %eax, %ecx ++ jnz L(char_nor_null) ++ ++ VMOVA (%rdi), %YMM1 ++ addq $VEC_SIZE, %rdi ++ ++ /* Each bit in K0 represents a null byte in YMM1. */ ++ VPCMP $0, %YMMZERO, %YMM1, %k0 ++ /* Each bit in K1 represents a CHAR in YMM1. */ ++ VPCMP $0, %YMMMATCH, %YMM1, %k1 ++ kmovd %k0, %ecx ++ kmovd %k1, %eax ++ orl %eax, %ecx ++ jz L(aligned_loop) ++ ++ .p2align 4 ++L(char_nor_null): ++ /* Find a CHAR or a null byte in a loop. */ ++ testl %eax, %eax ++ jnz L(match) ++L(return_value): ++ testl %edx, %edx ++ jz L(return_null) ++ movl %edx, %eax ++ movq %rsi, %rdi ++ bsrl %eax, %eax ++# ifdef USE_AS_WCSRCHR ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ leaq -VEC_SIZE(%rdi, %rax, 4), %rax ++# else ++ leaq -VEC_SIZE(%rdi, %rax), %rax ++# endif ++ ret ++ ++ .p2align 4 ++L(match): ++ /* Find a CHAR. Check if there is a null byte. */ ++ kmovd %k0, %ecx ++ testl %ecx, %ecx ++ jnz L(find_nul) ++ ++ /* Remember the match and keep searching. */ ++ movl %eax, %edx ++ movq %rdi, %rsi ++ jmp L(aligned_loop) ++ ++ .p2align 4 ++L(find_nul): ++ /* Mask out any matching bits after the null byte. */ ++ movl %ecx, %r8d ++ subl $1, %r8d ++ xorl %ecx, %r8d ++ andl %r8d, %eax ++ testl %eax, %eax ++ /* If there is no CHAR here, return the remembered one. */ ++ jz L(return_value) ++ bsrl %eax, %eax ++# ifdef USE_AS_WCSRCHR ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ leaq -VEC_SIZE(%rdi, %rax, 4), %rax ++# else ++ leaq -VEC_SIZE(%rdi, %rax), %rax ++# endif ++ ret ++ ++ .p2align 4 ++L(char_and_nul): ++ /* Find both a CHAR and a null byte. */ ++ addq %rcx, %rdi ++ movl %edx, %ecx ++L(char_and_nul_in_first_vec): ++ /* Mask out any matching bits after the null byte. */ ++ movl %ecx, %r8d ++ subl $1, %r8d ++ xorl %ecx, %r8d ++ andl %r8d, %eax ++ testl %eax, %eax ++ /* Return null pointer if the null byte comes first. */ ++ jz L(return_null) ++ bsrl %eax, %eax ++# ifdef USE_AS_WCSRCHR ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ ++ leaq -VEC_SIZE(%rdi, %rax, 4), %rax ++# else ++ leaq -VEC_SIZE(%rdi, %rax), %rax ++# endif ++ ret ++ ++ .p2align 4 ++L(return_null): ++ xorl %eax, %eax ++ ret ++ ++END (STRRCHR) ++#endif +diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S +new file mode 100644 +index 0000000000..d49dbbf0b4 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S +@@ -0,0 +1,3 @@ ++#define STRCHR __wcschr_avx2_rtm ++#define USE_AS_WCSCHR 1 ++#include "strchr-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/wcschr-evex.S b/sysdeps/x86_64/multiarch/wcschr-evex.S +new file mode 100644 +index 0000000000..7cb8f1e41a +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcschr-evex.S +@@ -0,0 +1,3 @@ ++#define STRCHR __wcschr_evex ++#define USE_AS_WCSCHR 1 ++#include "strchr-evex.S" +diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S +new file mode 100644 +index 0000000000..d6ca2b8064 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S +@@ -0,0 +1,4 @@ ++#define STRCMP __wcscmp_avx2_rtm ++#define USE_AS_WCSCMP 1 ++ ++#include "strcmp-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/wcscmp-evex.S b/sysdeps/x86_64/multiarch/wcscmp-evex.S +new file mode 100644 +index 0000000000..42e73e51eb +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcscmp-evex.S +@@ -0,0 +1,4 @@ ++#define STRCMP __wcscmp_evex ++#define USE_AS_WCSCMP 1 ++ ++#include "strcmp-evex.S" +diff --git a/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S +new file mode 100644 +index 0000000000..35658d7365 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S +@@ -0,0 +1,4 @@ ++#define STRLEN __wcslen_avx2_rtm ++#define USE_AS_WCSLEN 1 ++ ++#include "strlen-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/wcslen-evex.S b/sysdeps/x86_64/multiarch/wcslen-evex.S +new file mode 100644 +index 0000000000..bdafa83bd5 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcslen-evex.S +@@ -0,0 +1,4 @@ ++#define STRLEN __wcslen_evex ++#define USE_AS_WCSLEN 1 ++ ++#include "strlen-evex.S" +diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S +new file mode 100644 +index 0000000000..7e62621afc +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S +@@ -0,0 +1,4 @@ ++#define AS_WCSLEN ++#define strlen __wcslen_sse4_1 ++ ++#include "strlen-vec.S" +diff --git a/sysdeps/x86_64/multiarch/wcslen.c b/sysdeps/x86_64/multiarch/wcslen.c +index bb97438c7f..26b5fdffd6 100644 +--- a/sysdeps/x86_64/multiarch/wcslen.c ++++ b/sysdeps/x86_64/multiarch/wcslen.c +@@ -24,7 +24,7 @@ + # undef __wcslen + + # define SYMBOL_NAME wcslen +-# include "ifunc-avx2.h" ++# include "ifunc-wcslen.h" + + libc_ifunc_redirected (__redirect_wcslen, __wcslen, IFUNC_SELECTOR ()); + weak_alias (__wcslen, wcslen); +diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S +new file mode 100644 +index 0000000000..4e88c70cc6 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S +@@ -0,0 +1,5 @@ ++#define STRCMP __wcsncmp_avx2_rtm ++#define USE_AS_STRNCMP 1 ++#define USE_AS_WCSCMP 1 ++ ++#include "strcmp-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/wcsncmp-evex.S b/sysdeps/x86_64/multiarch/wcsncmp-evex.S +new file mode 100644 +index 0000000000..8a8e310713 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcsncmp-evex.S +@@ -0,0 +1,5 @@ ++#define STRCMP __wcsncmp_evex ++#define USE_AS_STRNCMP 1 ++#define USE_AS_WCSCMP 1 ++ ++#include "strcmp-evex.S" +diff --git a/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S +new file mode 100644 +index 0000000000..7437ebee2d +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S +@@ -0,0 +1,5 @@ ++#define STRLEN __wcsnlen_avx2_rtm ++#define USE_AS_WCSLEN 1 ++#define USE_AS_STRNLEN 1 ++ ++#include "strlen-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/wcsnlen-evex.S b/sysdeps/x86_64/multiarch/wcsnlen-evex.S +new file mode 100644 +index 0000000000..24773bb4e2 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcsnlen-evex.S +@@ -0,0 +1,5 @@ ++#define STRLEN __wcsnlen_evex ++#define USE_AS_WCSLEN 1 ++#define USE_AS_STRNLEN 1 ++ ++#include "strlen-evex.S" +diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S +index a8cab0cb00..5fa51fe07c 100644 +--- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S ++++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S +@@ -2,4 +2,4 @@ + #define AS_STRNLEN + #define strlen __wcsnlen_sse4_1 + +-#include "../strlen.S" ++#include "strlen-vec.S" +diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c +index 52e7e5d4f3..f15c1b328b 100644 +--- a/sysdeps/x86_64/multiarch/wcsnlen.c ++++ b/sysdeps/x86_64/multiarch/wcsnlen.c +@@ -24,27 +24,7 @@ + # undef __wcsnlen + + # define SYMBOL_NAME wcsnlen +-# include <init-arch.h> +- +-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +- +-static inline void * +-IFUNC_SELECTOR (void) +-{ +- const struct cpu_features* cpu_features = __get_cpu_features (); +- +- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) +- && CPU_FEATURE_USABLE_P (cpu_features, AVX2) +- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) +- return OPTIMIZE (avx2); +- +- if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) +- return OPTIMIZE (sse4_1); +- +- return OPTIMIZE (sse2); +-} ++# include "ifunc-wcslen.h" + + libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ()); + weak_alias (__wcsnlen, wcsnlen); +diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S +new file mode 100644 +index 0000000000..9bf760833f +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S +@@ -0,0 +1,3 @@ ++#define STRRCHR __wcsrchr_avx2_rtm ++#define USE_AS_WCSRCHR 1 ++#include "strrchr-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/wcsrchr-evex.S b/sysdeps/x86_64/multiarch/wcsrchr-evex.S +new file mode 100644 +index 0000000000..c64602f7dc +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcsrchr-evex.S +@@ -0,0 +1,3 @@ ++#define STRRCHR __wcsrchr_evex ++#define USE_AS_WCSRCHR 1 ++#include "strrchr-evex.S" +diff --git a/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S +new file mode 100644 +index 0000000000..58ed21db01 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S +@@ -0,0 +1,4 @@ ++#define MEMCHR __wmemchr_avx2_rtm ++#define USE_AS_WMEMCHR 1 ++ ++#include "memchr-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/wmemchr-evex.S b/sysdeps/x86_64/multiarch/wmemchr-evex.S +new file mode 100644 +index 0000000000..06cd0f9f5a +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wmemchr-evex.S +@@ -0,0 +1,4 @@ ++#define MEMCHR __wmemchr_evex ++#define USE_AS_WMEMCHR 1 ++ ++#include "memchr-evex.S" +diff --git a/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S +new file mode 100644 +index 0000000000..31104d1215 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S +@@ -0,0 +1,4 @@ ++#define MEMCMP __wmemcmp_avx2_movbe_rtm ++#define USE_AS_WMEMCMP 1 ++ ++#include "memcmp-avx2-movbe-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S +new file mode 100644 +index 0000000000..4726d74aa1 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S +@@ -0,0 +1,4 @@ ++#define MEMCMP __wmemcmp_evex_movbe ++#define USE_AS_WMEMCMP 1 ++ ++#include "memcmp-evex-movbe.S" +diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S +index 2e226d0d55..8422c15cc8 100644 +--- a/sysdeps/x86_64/strlen.S ++++ b/sysdeps/x86_64/strlen.S +@@ -1,5 +1,5 @@ +-/* SSE2 version of strlen/wcslen. +- Copyright (C) 2012-2020 Free Software Foundation, Inc. ++/* SSE2 version of strlen. ++ Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -16,243 +16,6 @@ + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +-#include <sysdep.h> ++#include "multiarch/strlen-vec.S" + +-#ifdef AS_WCSLEN +-# define PMINU pminud +-# define PCMPEQ pcmpeqd +-# define SHIFT_RETURN shrq $2, %rax +-#else +-# define PMINU pminub +-# define PCMPEQ pcmpeqb +-# define SHIFT_RETURN +-#endif +- +-/* Long lived register in strlen(s), strnlen(s, n) are: +- +- %xmm3 - zero +- %rdi - s +- %r10 (s+n) & (~(64-1)) +- %r11 s+n +-*/ +- +- +-.text +-ENTRY(strlen) +- +-/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */ +-#define FIND_ZERO \ +- PCMPEQ (%rax), %xmm0; \ +- PCMPEQ 16(%rax), %xmm1; \ +- PCMPEQ 32(%rax), %xmm2; \ +- PCMPEQ 48(%rax), %xmm3; \ +- pmovmskb %xmm0, %esi; \ +- pmovmskb %xmm1, %edx; \ +- pmovmskb %xmm2, %r8d; \ +- pmovmskb %xmm3, %ecx; \ +- salq $16, %rdx; \ +- salq $16, %rcx; \ +- orq %rsi, %rdx; \ +- orq %r8, %rcx; \ +- salq $32, %rcx; \ +- orq %rcx, %rdx; +- +-#ifdef AS_STRNLEN +-/* Do not read anything when n==0. */ +- test %RSI_LP, %RSI_LP +- jne L(n_nonzero) +- xor %rax, %rax +- ret +-L(n_nonzero): +-# ifdef AS_WCSLEN +- shl $2, %RSI_LP +-# endif +- +-/* Initialize long lived registers. */ +- +- add %RDI_LP, %RSI_LP +- mov %RSI_LP, %R10_LP +- and $-64, %R10_LP +- mov %RSI_LP, %R11_LP +-#endif +- +- pxor %xmm0, %xmm0 +- pxor %xmm1, %xmm1 +- pxor %xmm2, %xmm2 +- pxor %xmm3, %xmm3 +- movq %rdi, %rax +- movq %rdi, %rcx +- andq $4095, %rcx +-/* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */ +- cmpq $4047, %rcx +-/* We cannot unify this branching as it would be ~6 cycles slower. */ +- ja L(cross_page) +- +-#ifdef AS_STRNLEN +-/* Test if end is among first 64 bytes. */ +-# define STRNLEN_PROLOG \ +- mov %r11, %rsi; \ +- subq %rax, %rsi; \ +- andq $-64, %rax; \ +- testq $-64, %rsi; \ +- je L(strnlen_ret) +-#else +-# define STRNLEN_PROLOG andq $-64, %rax; +-#endif +- +-/* Ignore bits in mask that come before start of string. */ +-#define PROLOG(lab) \ +- movq %rdi, %rcx; \ +- xorq %rax, %rcx; \ +- STRNLEN_PROLOG; \ +- sarq %cl, %rdx; \ +- test %rdx, %rdx; \ +- je L(lab); \ +- bsfq %rdx, %rax; \ +- SHIFT_RETURN; \ +- ret +- +-#ifdef AS_STRNLEN +- andq $-16, %rax +- FIND_ZERO +-#else +- /* Test first 16 bytes unaligned. */ +- movdqu (%rax), %xmm4 +- PCMPEQ %xmm0, %xmm4 +- pmovmskb %xmm4, %edx +- test %edx, %edx +- je L(next48_bytes) +- bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */ +- SHIFT_RETURN +- ret +- +-L(next48_bytes): +-/* Same as FIND_ZERO except we do not check first 16 bytes. */ +- andq $-16, %rax +- PCMPEQ 16(%rax), %xmm1 +- PCMPEQ 32(%rax), %xmm2 +- PCMPEQ 48(%rax), %xmm3 +- pmovmskb %xmm1, %edx +- pmovmskb %xmm2, %r8d +- pmovmskb %xmm3, %ecx +- salq $16, %rdx +- salq $16, %rcx +- orq %r8, %rcx +- salq $32, %rcx +- orq %rcx, %rdx +-#endif +- +- /* When no zero byte is found xmm1-3 are zero so we do not have to +- zero them. */ +- PROLOG(loop) +- +- .p2align 4 +-L(cross_page): +- andq $-64, %rax +- FIND_ZERO +- PROLOG(loop_init) +- +-#ifdef AS_STRNLEN +-/* We must do this check to correctly handle strnlen (s, -1). */ +-L(strnlen_ret): +- bts %rsi, %rdx +- sarq %cl, %rdx +- test %rdx, %rdx +- je L(loop_init) +- bsfq %rdx, %rax +- SHIFT_RETURN +- ret +-#endif +- .p2align 4 +-L(loop_init): +- pxor %xmm1, %xmm1 +- pxor %xmm2, %xmm2 +- pxor %xmm3, %xmm3 +-#ifdef AS_STRNLEN +- .p2align 4 +-L(loop): +- +- addq $64, %rax +- cmpq %rax, %r10 +- je L(exit_end) +- +- movdqa (%rax), %xmm0 +- PMINU 16(%rax), %xmm0 +- PMINU 32(%rax), %xmm0 +- PMINU 48(%rax), %xmm0 +- PCMPEQ %xmm3, %xmm0 +- pmovmskb %xmm0, %edx +- testl %edx, %edx +- jne L(exit) +- jmp L(loop) +- +- .p2align 4 +-L(exit_end): +- cmp %rax, %r11 +- je L(first) /* Do not read when end is at page boundary. */ +- pxor %xmm0, %xmm0 +- FIND_ZERO +- +-L(first): +- bts %r11, %rdx +- bsfq %rdx, %rdx +- addq %rdx, %rax +- subq %rdi, %rax +- SHIFT_RETURN +- ret +- +- .p2align 4 +-L(exit): +- pxor %xmm0, %xmm0 +- FIND_ZERO +- +- bsfq %rdx, %rdx +- addq %rdx, %rax +- subq %rdi, %rax +- SHIFT_RETURN +- ret +- +-#else +- +- /* Main loop. Unrolled twice to improve L2 cache performance on core2. */ +- .p2align 4 +-L(loop): +- +- movdqa 64(%rax), %xmm0 +- PMINU 80(%rax), %xmm0 +- PMINU 96(%rax), %xmm0 +- PMINU 112(%rax), %xmm0 +- PCMPEQ %xmm3, %xmm0 +- pmovmskb %xmm0, %edx +- testl %edx, %edx +- jne L(exit64) +- +- subq $-128, %rax +- +- movdqa (%rax), %xmm0 +- PMINU 16(%rax), %xmm0 +- PMINU 32(%rax), %xmm0 +- PMINU 48(%rax), %xmm0 +- PCMPEQ %xmm3, %xmm0 +- pmovmskb %xmm0, %edx +- testl %edx, %edx +- jne L(exit0) +- jmp L(loop) +- +- .p2align 4 +-L(exit64): +- addq $64, %rax +-L(exit0): +- pxor %xmm0, %xmm0 +- FIND_ZERO +- +- bsfq %rdx, %rdx +- addq %rdx, %rax +- subq %rdi, %rax +- SHIFT_RETURN +- ret +- +-#endif +- +-END(strlen) + libc_hidden_builtin_def (strlen) +diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h +index 0b73674f68..c8ad778fee 100644 +--- a/sysdeps/x86_64/sysdep.h ++++ b/sysdeps/x86_64/sysdep.h +@@ -95,6 +95,28 @@ lose: \ + #define R14_LP r14 + #define R15_LP r15 + ++/* Zero upper vector registers and return with xtest. NB: Use VZEROALL ++ to avoid RTM abort triggered by VZEROUPPER inside transactionally. */ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST \ ++ xtest; \ ++ jz 1f; \ ++ vzeroall; \ ++ ret; \ ++1: \ ++ vzeroupper; \ ++ ret ++ ++/* Zero upper vector registers and return. */ ++#ifndef ZERO_UPPER_VEC_REGISTERS_RETURN ++# define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ VZEROUPPER; \ ++ ret ++#endif ++ ++#ifndef VZEROUPPER_RETURN ++# define VZEROUPPER_RETURN VZEROUPPER; ret ++#endif ++ + #else /* __ASSEMBLER__ */ + + /* Long and pointer size in bytes. */ +diff --git a/sysdeps/x86_64/tst-rsi-strlen.c b/sysdeps/x86_64/tst-rsi-strlen.c +new file mode 100644 +index 0000000000..a80c4f85c2 +--- /dev/null ++++ b/sysdeps/x86_64/tst-rsi-strlen.c +@@ -0,0 +1,81 @@ ++/* Test strlen with 0 in the RSI register. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#ifdef WIDE ++# define TEST_NAME "wcslen" ++#else ++# define TEST_NAME "strlen" ++#endif /* WIDE */ ++ ++#define TEST_MAIN ++#include <string/test-string.h> ++ ++#ifdef WIDE ++# include <wchar.h> ++# define STRLEN wcslen ++# define CHAR wchar_t ++#else ++# define STRLEN strlen ++# define CHAR char ++#endif /* WIDE */ ++ ++IMPL (STRLEN, 1) ++ ++typedef size_t (*proto_t) (const CHAR *); ++ ++typedef struct ++{ ++ void (*fn) (void); ++} parameter_t; ++ ++size_t ++__attribute__ ((weak, noinline, noclone)) ++do_strlen (parameter_t *a, int zero, const CHAR *str) ++{ ++ return CALL (a, str); ++} ++ ++static int ++test_main (void) ++{ ++ test_init (); ++ ++ size_t size = page_size / sizeof (CHAR) - 1; ++ CHAR *buf = (CHAR *) buf2; ++ buf[size] = 0; ++ ++ parameter_t a; ++ ++ int ret = 0; ++ FOR_EACH_IMPL (impl, 0) ++ { ++ a.fn = impl->fn; ++ /* NB: Pass 0 in RSI. */ ++ size_t res = do_strlen (&a, 0, buf); ++ if (res != size) ++ { ++ error (0, 0, "Wrong result in function %s: %zu != %zu", ++ impl->name, res, size); ++ ret = 1; ++ } ++ } ++ ++ return ret ? EXIT_FAILURE : EXIT_SUCCESS; ++} ++ ++#include <support/test-driver.c> +diff --git a/sysdeps/x86_64/tst-rsi-wcslen.c b/sysdeps/x86_64/tst-rsi-wcslen.c +new file mode 100644 +index 0000000000..f45a7dfb51 +--- /dev/null ++++ b/sysdeps/x86_64/tst-rsi-wcslen.c +@@ -0,0 +1,20 @@ ++/* Test wcslen with 0 in the RSI register. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#define WIDE 1 ++#include "tst-rsi-strlen.c" +diff --git a/sysvipc/test-sysvsem.c b/sysvipc/test-sysvsem.c +index 01dbff343a..b7284e0b48 100644 +--- a/sysvipc/test-sysvsem.c ++++ b/sysvipc/test-sysvsem.c +@@ -20,6 +20,7 @@ + #include <stdlib.h> + #include <errno.h> + #include <string.h> ++#include <stdbool.h> + #include <sys/types.h> + #include <sys/ipc.h> + #include <sys/sem.h> +diff --git a/version.h b/version.h +index 83cd196798..e6ca7a8857 100644 +--- a/version.h ++++ b/version.h +@@ -1,4 +1,4 @@ + /* This file just defines the current version number of libc. */ + +-#define RELEASE "release" ++#define RELEASE "stable" + #define VERSION "2.32" +diff -pruN glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/64/configure glibc-2.32/sysdeps/unix/sysv/linux/x86_64/64/configure +--- glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/64/configure 2021-09-18 21:02:32.741186019 +1000 ++++ glibc-2.32/sysdeps/unix/sysv/linux/x86_64/64/configure 2021-09-18 21:03:05.314302356 +1000 +@@ -4,10 +4,10 @@ + test -n "$libc_cv_slibdir" || + case "$prefix" in + /usr | /usr/) +- libc_cv_slibdir='/lib64' +- libc_cv_rtlddir='/lib64' ++ libc_cv_slibdir='/lib' ++ libc_cv_rtlddir='/lib' + if test "$libdir" = '${exec_prefix}/lib'; then +- libdir='${exec_prefix}/lib64'; ++ libdir='${exec_prefix}/lib'; + # Locale data can be shared between 32-bit and 64-bit libraries. + libc_cv_complocaledir='${exec_prefix}/lib/locale' + fi +diff -pruN glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/ldconfig.h glibc-2.32/sysdeps/unix/sysv/linux/x86_64/ldconfig.h +--- glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/ldconfig.h 2021-09-18 21:02:32.742186053 +1000 ++++ glibc-2.32/sysdeps/unix/sysv/linux/x86_64/ldconfig.h 2021-09-18 21:03:05.314302356 +1000 +@@ -18,9 +18,9 @@ + #include <sysdeps/generic/ldconfig.h> + + #define SYSDEP_KNOWN_INTERPRETER_NAMES \ +- { "/lib/ld-linux.so.2", FLAG_ELF_LIBC6 }, \ ++ { "/lib32/ld-linux.so.2", FLAG_ELF_LIBC6 }, \ + { "/libx32/ld-linux-x32.so.2", FLAG_ELF_LIBC6 }, \ +- { "/lib64/ld-linux-x86-64.so.2", FLAG_ELF_LIBC6 }, ++ { "/lib/ld-linux-x86-64.so.2", FLAG_ELF_LIBC6 }, + #define SYSDEP_KNOWN_LIBRARY_NAMES \ + { "libc.so.6", FLAG_ELF_LIBC6 }, \ + { "libm.so.6", FLAG_ELF_LIBC6 },