diff --git a/19614-locale-Handle-loading-a-missing-locale-twice-Bug-14.patch b/19614-locale-Handle-loading-a-missing-locale-twice-Bug-14.patch new file mode 100644 index 0000000000000000000000000000000000000000..fcb335f4443f062cea8cfac2614c36d01a13f234 --- /dev/null +++ b/19614-locale-Handle-loading-a-missing-locale-twice-Bug-14.patch @@ -0,0 +1,274 @@ +From 684fbab755e727a8c15f8b621648d66694cd1f53 Mon Sep 17 00:00:00 2001 +From: Carlos O'Donell +Date: Mon, 22 Apr 2024 08:16:09 -0400 +Subject: locale: Handle loading a missing locale twice (Bug 14247) + +Delay setting file->decided until the data has been successfully loaded +by _nl_load_locale(). If the function fails to load the data then we +must return and error and leave decided untouched to allow the caller to +attempt to load the data again at a later time. We should not set +decided to 1 early in the function since doing so may prevent attempting +to load it again. We want to try loading it again because that allows an +open to fail and set errno correctly. + +On the other side of this problem is that if we are called again with +the same inputs we will fetch the cached version of the object and carry +out no open syscalls and that fails to set errno so we must set errno to +ENOENT in that case. There is a second code path that has to be handled +where the name of the locale matches but the codeset doesn't match. + +These changes ensure that errno is correctly set on failure in all the +return paths in _nl_find_locale(). + +Adds tst-locale-loadlocale to cover the bug. + +No regressions on x86_64. + +Co-authored-by: Jeff Law +Reviewed-by: Adhemerval Zanella +--- + gen-locales.mk | 13 +++++- + locale/findlocale.c | 19 +++++++-- + locale/loadlocale.c | 2 +- + localedata/Makefile | 4 ++ + localedata/gen-locale.sh | 24 ++++++++--- + localedata/tst-locale-loadlocale.c | 67 ++++++++++++++++++++++++++++++ + 6 files changed, 119 insertions(+), 10 deletions(-) + create mode 100644 localedata/tst-locale-loadlocale.c + +diff --git a/gen-locales.mk b/gen-locales.mk +index 9c523d2a..b005a728 100644 +--- a/gen-locales.mk ++++ b/gen-locales.mk +@@ -1,8 +1,19 @@ + # defines target $(gen-locales) that generates the locales given in $(LOCALES) + + LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^@ ]*\(@[^ ]*\)\?/\1\2/g') ++# The CHARMAPS dependency handling must be able to process: ++# 1. No character map e.g. eo, en_US ++# 2. Character maps e.g. en_US.UTF-8 ++# 3. Character maps with modifier e.g. tt_RU.UTF-8@iqtelif ++# This complicates the processing slightly so we do it in multiple edits, ++# the first captures the character map with the anchoring period while ++# the rest of the edits remove the period to get a valid file or adjust ++# the name to match the true name. + CHARMAPS := $(shell echo "$(LOCALES)" | \ +- sed -e 's/[^ .]*[.]\([^@ ]*\)\(@[^@ ]*\)*/\1/g' -e s/SJIS/SHIFT_JIS/g) ++ sed -e 's/\([^ .]*\)\([^@ ]*\)\(@[^@ ]*\)*/\2/g' \ ++ -e 's/^\./ /g' \ ++ -e 's/ \./ /g' \ ++ -e s/SJIS/SHIFT_JIS/g) + CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES)) + gen-locales := $(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)) + +diff --git a/locale/findlocale.c b/locale/findlocale.c +index 1b703434..df74fbae 100644 +--- a/locale/findlocale.c ++++ b/locale/findlocale.c +@@ -244,7 +244,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, + locale_file = locale_file->successor[cnt]; + + if (locale_file == NULL) +- return NULL; ++ { ++ /* If this is the second time we tried to load a failed ++ locale then the locale_file value comes from the cache ++ and we will not carry out any actual filesystem ++ operations so we must set ENOENT here. */ ++ __set_errno (ENOENT); ++ return NULL; ++ } + } + + /* The LC_CTYPE category allows to check whether a locale is really +@@ -291,8 +298,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, + if (__gconv_compare_alias (upstr (ccodeset, ccodeset), + upstr (clocale_codeset, + clocale_codeset)) != 0) +- /* The codesets are not identical, don't use the locale. */ +- return NULL; ++ { ++ /* The codesets are not identical, don't use the locale. ++ If this is the second time we tried to load a locale ++ whose codeset doesn't match then the result came from ++ the cache and must set ENOENT here. */ ++ __set_errno (ENOENT); ++ return NULL; ++ } + } + + /* Determine the locale name for which loading succeeded. This +diff --git a/locale/loadlocale.c b/locale/loadlocale.c +index 671e71cf..1b4d0b05 100644 +--- a/locale/loadlocale.c ++++ b/locale/loadlocale.c +@@ -237,7 +237,6 @@ _nl_load_locale (struct loaded_l10nfile *file, int category) + int save_err; + int alloc = ld_mapped; + +- file->decided = 1; + file->data = NULL; + + fd = __open_nocancel (file->filename, O_RDONLY | O_CLOEXEC); +@@ -345,6 +344,7 @@ _nl_load_locale (struct loaded_l10nfile *file, int category) + newdata->alloc = alloc; + + file->data = newdata; ++ file->decided = 1; + } + + void +diff --git a/localedata/Makefile b/localedata/Makefile +index 3453b8c9..888634b3 100644 +--- a/localedata/Makefile ++++ b/localedata/Makefile +@@ -166,6 +166,7 @@ tests = \ + tst-digits \ + tst-iconv-math-trans \ + tst-leaks \ ++ tst-locale-loadlocale \ + tst-mbswcs1 \ + tst-mbswcs2 \ + tst-mbswcs3 \ +@@ -243,6 +244,7 @@ LOCALES := \ + dsb_DE.UTF-8 \ + dz_BT.UTF-8 \ + en_GB.UTF-8 \ ++ en_US \ + en_US.ANSI_X3.4-1968 \ + en_US.ISO-8859-1\ + en_US.UTF-8 \ +@@ -321,6 +323,8 @@ LOCALES := \ + include ../gen-locales.mk + + $(objpfx)tst-iconv-math-trans.out: $(gen-locales) ++# tst-locale-loadlocale: Needs an en_US-named locale for the test. ++$(objpfx)tst-locale-loadlocale.out: $(gen-locales) + endif + + include ../Rules +diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh +index 7a9e8556..740b6500 100644 +--- a/localedata/gen-locale.sh ++++ b/localedata/gen-locale.sh +@@ -48,9 +48,9 @@ generate_locale () + } + + locfile=`echo $locfile|sed 's|.*/\([^/]*/LC_CTYPE\)|\1|'` +-locale=`echo $locfile|sed 's|\([^.]*\)[.].*/LC_CTYPE|\1|'` +-charmap=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|'` +-modifier=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'` ++locale=`echo $locfile|sed 's|\([^.]*\).*/LC_CTYPE|\1|'` ++charmap=`echo $locfile|sed -e 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|' -e 's|^\.||g'` ++modifier=`echo $locfile|sed 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'` + + echo "Generating locale $locale.$charmap: this might take a while..." + +@@ -63,10 +63,16 @@ echo "Generating locale $locale.$charmap: this might take a while..." + # you to develop in-progress locales. + flags="" + ++charmap_real="$charmap" ++ ++# If no character map is specified then we fall back to UTF-8. ++if [ -z "$charmap" ]; then ++ charmap_real="UTF-8" ++fi ++ + # For SJIS the charmap is SHIFT_JIS. We just want the locale to have + # a slightly nicer name instead of using "*.SHIFT_SJIS", but that + # means we need a mapping here. +-charmap_real="$charmap" + if [ "$charmap" = "SJIS" ]; then + charmap_real="SHIFT_JIS" + fi +@@ -80,4 +86,12 @@ if [ "$charmap_real" = 'SHIFT_JIS' ] \ + flags="$flags --no-warnings=ascii" + fi + +-generate_locale $charmap_real $locale$modifier $locale.$charmap$modifier "$flags" ++# If the character map is not specified then we output a locale ++# with the just the name of the locale e.g. eo, en_US. This is ++# used for test locales that act as fallbacks. ++output_file="$locale.$charmap$modifier" ++if [ -z "$charmap" ]; then ++ output_file="$locale" ++fi ++ ++generate_locale $charmap_real $locale$modifier $output_file "$flags" +diff --git a/localedata/tst-locale-loadlocale.c b/localedata/tst-locale-loadlocale.c +new file mode 100644 +index 00000000..64796e97 +--- /dev/null ++++ b/localedata/tst-locale-loadlocale.c +@@ -0,0 +1,67 @@ ++/* Test for locale loading error handling (Bug 14247) ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ locale_t loc = NULL; ++ ++ /* We must load an en_US-based locale for the final test to fail. ++ The locale loading code will find the en_US locale already loaded ++ but the codesets won't match. */ ++ xsetlocale (LC_ALL, "en_US.UTF-8"); ++ ++ /* Call newlocale with an invalid locale. We expect the test system ++ does not have "invalidlocale" locale. */ ++ errno = 0; ++ loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0); ++ TEST_VERIFY (loc == NULL && errno != 0); ++ ++ /* Call newlocale again with the same name. This triggers bug 14247 where ++ the second call fails to set errno correctly. */ ++ errno = 0; ++ loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0); ++ TEST_VERIFY (loc == NULL && errno != 0); ++ ++ /* Next we attempt to load a locale that exists but whose codeset ++ does not match the codeset of the locale on the system. ++ This is more difficult to test but we rely on the fact that locale ++ processing will normalize the locale name and attempt to open ++ "en_US" with no codeset as a fallback and this will allow us to ++ compare a loaded "en_US" locale with a UTF-8 codeset to the ++ ficiticious "en_US.utf99" and get a codeset match failure. */ ++ errno = 0; ++ loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0); ++ TEST_VERIFY (loc == NULL && errno != 0); ++ ++ /* Call newlocale again with the same name. This triggers bug 14247 where ++ the second call fails to set errno correctly. */ ++ errno = 0; ++ loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0); ++ TEST_VERIFY (loc == NULL && errno != 0); ++ ++ return 0; ++} ++ ++#include +-- +2.27.0 + diff --git a/glibc.spec b/glibc.spec index d9570541604eed0334b4a52df317e5ab0370bb21..b0e8539b3da987f066675839bd4258ecf8e43860 100644 --- a/glibc.spec +++ b/glibc.spec @@ -67,7 +67,7 @@ ############################################################################## Name: glibc Version: 2.38 -Release: 36 +Release: 37 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -214,6 +214,7 @@ Patch124: 0011-Update-syscall-lists-for-Linux-6.5.patch Patch125: 0012-resolv-Fix-tst-resolv-short-response-for-older-GCC-b.patch Patch126: Fix-name-space-violation-in-fortify-wrappers-bug-320.patch Patch127: x86-Fix-bug-in-strchrnul-evex512-BZ-32078.patch +Patch128: 19614-locale-Handle-loading-a-missing-locale-twice-Bug-14.patch #openEuler patch list Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch @@ -1446,6 +1447,9 @@ fi %endif %changelog +* Wed Sep 4 2024 Zhaoshuang - 2.38-37 +- Fix issue that loading a missing locale twice [BZ #14247] + * Tue Aug 27 2024 YunQiang Su - 2.38-36 - Fix libnsl.so and libnss_.so filelist process (PR 877)