diff --git a/Rewrite-iconv-option-parsing-BZ-19519.patch b/Rewrite-iconv-option-parsing-BZ-19519.patch new file mode 100644 index 0000000000000000000000000000000000000000..0a88e90342c2a2406ac2478fdc87c0d2527a5742 --- /dev/null +++ b/Rewrite-iconv-option-parsing-BZ-19519.patch @@ -0,0 +1,1381 @@ +From 91927b7c76437db860cd86a7714476b56bb39d07 Mon Sep 17 00:00:00 2001 +From: Arjun Shankar +Date: Tue, 7 Jul 2020 20:31:48 +0200 +Subject: [PATCH] Rewrite iconv option parsing [BZ #19519] + +This commit replaces string manipulation during `iconv_open' and iconv_prog +option parsing with a structured, flag based conversion specification. In +doing so, it alters the internal `__gconv_open' interface and accordingly +adjusts its uses. + +This change fixes several hangs in the iconv program and therefore includes +a new test to exercise iconv_prog options that originally led to these hangs. +It also includes a new regression test for option handling in the iconv +function. + +Reviewed-by: Florian Weimer +Reviewed-by: Siddhesh Poyarekar +Reviewed-by: Carlos O'Donell +--- + iconv/Makefile | 17 +- + iconv/Versions | 1 + + iconv/gconv_charset.c | 218 +++++++++++++++++++++++++ + iconv/gconv_charset.h | 61 ++++++- + iconv/gconv_int.h | 19 ++- + iconv/gconv_open.c | 64 ++------ + iconv/iconv_open.c | 46 +----- + iconv/iconv_prog.c | 63 +++----- + iconv/tst-iconv-opt.c | 347 ++++++++++++++++++++++++++++++++++++++++ + iconv/tst-iconv_prog.sh | 280 ++++++++++++++++++++++++++++++++ + intl/dcigettext.c | 15 +- + 11 files changed, 988 insertions(+), 143 deletions(-) + create mode 100644 iconv/gconv_charset.c + create mode 100644 iconv/tst-iconv-opt.c + create mode 100644 iconv/tst-iconv_prog.sh + +diff --git a/iconv/Makefile b/iconv/Makefile +index b8fe8c47df..30bf996d3a 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -26,7 +26,7 @@ headers = iconv.h gconv.h + routines = iconv_open iconv iconv_close \ + gconv_open gconv gconv_close gconv_db gconv_conf \ + gconv_builtin gconv_simple gconv_trans gconv_cache +-routines += gconv_dl ++routines += gconv_dl gconv_charset + + vpath %.c ../locale/programs ../intl + +@@ -44,7 +44,7 @@ CFLAGS-linereader.c += -DNO_TRANSLITERATION + CFLAGS-simple-hash.c += -I../locale + + tests = tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \ +- tst-iconv7 tst-iconv8 ++ tst-iconv7 tst-iconv8 tst-iconv-opt + + others = iconv_prog iconvconfig + others-pie += iconv_prog iconvconfig +@@ -61,6 +61,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) + + ifeq ($(run-built-tests),yes) + xtests-special += $(objpfx)test-iconvconfig.out ++tests-special += $(objpfx)tst-iconv_prog.out + endif + + # Make a copy of the file because gconv module names are constructed +@@ -81,6 +82,13 @@ endif + + include ../Rules + ++ifeq ($(run-built-tests),yes) ++LOCALES := en_US.UTF-8 ++include ../gen-locales.mk ++ ++$(objpfx)tst-iconv-opt.out: $(gen-locales) ++endif ++ + $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force) + $(do-install-program) + +@@ -95,3 +103,8 @@ $(objpfx)test-iconvconfig.out: /dev/null $(objpfx)iconvconfig + cmp $$tmp $(inst_gconvdir)/gconv-modules.cache; \ + rm -f $$tmp) > $@; \ + $(evaluate-test) ++ ++$(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog ++ $(BASH) $< $(common-objdir) '$(test-wrapper-env)' \ ++ '$(run-program-env)' > $@; \ ++ $(evaluate-test) +diff --git a/iconv/Versions b/iconv/Versions +index 60ab10a277..8a5f4cf780 100644 +--- a/iconv/Versions ++++ b/iconv/Versions +@@ -6,6 +6,7 @@ libc { + GLIBC_PRIVATE { + # functions shared with iconv program + __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; ++ __gconv_open; __gconv_create_spec; + + # function used by the gconv modules + __gconv_transliterate; +diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c +new file mode 100644 +index 0000000000..6ccd0773cc +--- /dev/null ++++ b/iconv/gconv_charset.c +@@ -0,0 +1,218 @@ ++/* Charset name normalization. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "gconv_int.h" ++#include "gconv_charset.h" ++ ++ ++/* This function returns a pointer to the last suffix in a conversion code ++ string. Valid suffixes matched by this function are of the form: '/' or ',' ++ followed by arbitrary text that doesn't contain '/' or ','. It does not ++ edit the string in any way. The caller is expected to parse the suffix and ++ remove it (by e.g. truncating the string) before the next call. */ ++static char * ++find_suffix (char *s) ++{ ++ /* The conversion code is in the form of a triplet, separated by '/' chars. ++ The third component of the triplet contains suffixes. If we don't have two ++ slashes, we don't have a suffix. */ ++ ++ int slash_count = 0; ++ char *suffix_term = NULL; ++ ++ for (int i = 0; s[i] != '\0'; i++) ++ switch (s[i]) ++ { ++ case '/': ++ slash_count++; ++ /* Fallthrough */ ++ case ',': ++ suffix_term = &s[i]; ++ } ++ ++ if (slash_count >= 2) ++ return suffix_term; ++ ++ return NULL; ++} ++ ++ ++struct gconv_parsed_code ++{ ++ char *code; ++ bool translit; ++ bool ignore; ++}; ++ ++ ++/* This function parses an iconv_open encoding PC.CODE, strips any suffixes ++ (such as TRANSLIT or IGNORE) from it and sets corresponding flags in it. */ ++static void ++gconv_parse_code (struct gconv_parsed_code *pc) ++{ ++ pc->translit = false; ++ pc->ignore = false; ++ ++ while (1) ++ { ++ /* First drop any trailing whitespaces and separators. */ ++ size_t len = strlen (pc->code); ++ while ((len > 0) ++ && (isspace (pc->code[len - 1]) ++ || pc->code[len - 1] == ',' ++ || pc->code[len - 1] == '/')) ++ len--; ++ ++ pc->code[len] = '\0'; ++ ++ if (len == 0) ++ return; ++ ++ char * suffix = find_suffix (pc->code); ++ if (suffix == NULL) ++ { ++ /* At this point, we have processed and removed all suffixes from the ++ code and what remains of the code is suffix free. */ ++ return; ++ } ++ else ++ { ++ /* A suffix is processed from the end of the code array going ++ backwards, one suffix at a time. The suffix is an index into the ++ code character array and points to: one past the end of the code ++ and any unprocessed suffixes, and to the beginning of the suffix ++ currently being processed during this iteration. We must process ++ this suffix and then drop it from the code by terminating the ++ preceding text with NULL. ++ ++ We want to allow and recognize suffixes such as: ++ ++ "/TRANSLIT" i.e. single suffix ++ "//TRANSLIT" i.e. single suffix and multiple separators ++ "//TRANSLIT/IGNORE" i.e. suffixes separated by "/" ++ "/TRANSLIT//IGNORE" i.e. suffixes separated by "//" ++ "//IGNORE,TRANSLIT" i.e. suffixes separated by "," ++ "//IGNORE," i.e. trailing "," ++ "//TRANSLIT/" i.e. trailing "/" ++ "//TRANSLIT//" i.e. trailing "//" ++ "/" i.e. empty suffix. ++ ++ Unknown suffixes are silently discarded and ignored. */ ++ ++ if ((__strcasecmp_l (suffix, ++ GCONV_TRIPLE_SEPARATOR ++ GCONV_TRANSLIT_SUFFIX, ++ _nl_C_locobj_ptr) == 0) ++ || (__strcasecmp_l (suffix, ++ GCONV_SUFFIX_SEPARATOR ++ GCONV_TRANSLIT_SUFFIX, ++ _nl_C_locobj_ptr) == 0)) ++ pc->translit = true; ++ ++ if ((__strcasecmp_l (suffix, ++ GCONV_TRIPLE_SEPARATOR ++ GCONV_IGNORE_ERRORS_SUFFIX, ++ _nl_C_locobj_ptr) == 0) ++ || (__strcasecmp_l (suffix, ++ GCONV_SUFFIX_SEPARATOR ++ GCONV_IGNORE_ERRORS_SUFFIX, ++ _nl_C_locobj_ptr) == 0)) ++ pc->ignore = true; ++ ++ /* We just processed this suffix. We can now drop it from the ++ code string by truncating it at the suffix's position. */ ++ suffix[0] = '\0'; ++ } ++ } ++} ++ ++ ++/* This function accepts the charset names of the source and destination of the ++ conversion and populates *conv_spec with an equivalent conversion ++ specification that may later be used by __gconv_open. The charset names ++ might contain options in the form of suffixes that alter the conversion, ++ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring ++ and truncating any suffix options in fromcode, and processing and truncating ++ any suffix options in tocode. Supported suffix options ("TRANSLIT" or ++ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec ++ to be set to true. Unrecognized suffix options are silently discarded. If ++ the function succeeds, it returns conv_spec back to the caller. It returns ++ NULL upon failure. conv_spec must be allocated and freed by the caller. */ ++struct gconv_spec * ++__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, ++ const char *tocode) ++{ ++ struct gconv_parsed_code pfc, ptc; ++ struct gconv_spec *ret = NULL; ++ ++ pfc.code = __strdup (fromcode); ++ ptc.code = __strdup (tocode); ++ ++ if ((pfc.code == NULL) ++ || (ptc.code == NULL)) ++ goto out; ++ ++ gconv_parse_code (&pfc); ++ gconv_parse_code (&ptc); ++ ++ /* We ignore suffixes in the fromcode because that is how the current ++ implementation has always handled them. Only suffixes in the tocode are ++ processed and handled. The reality is that invalid input in the input ++ character set should only be ignored if the fromcode specifies IGNORE. ++ The current implementation ignores invalid intput in the input character ++ set if the tocode contains IGNORE. We preserve this behavior for ++ backwards compatibility. In the future we may split the handling of ++ IGNORE to allow a finer grained specification of ignorning invalid input ++ and/or ignoring invalid output. */ ++ conv_spec->translit = ptc.translit; ++ conv_spec->ignore = ptc.ignore; ++ ++ /* 3 extra bytes because 1 extra for '\0', and 2 extra so strip might ++ be able to add one or two trailing '/' characters if necessary. */ ++ conv_spec->fromcode = malloc (strlen (fromcode) + 3); ++ if (conv_spec->fromcode == NULL) ++ goto out; ++ ++ conv_spec->tocode = malloc (strlen (tocode) + 3); ++ if (conv_spec->tocode == NULL) ++ { ++ free (conv_spec->fromcode); ++ conv_spec->fromcode = NULL; ++ goto out; ++ } ++ ++ /* Strip unrecognized characters and ensure that the code has two '/' ++ characters as per conversion code triplet specification. */ ++ strip (conv_spec->fromcode, pfc.code); ++ strip (conv_spec->tocode, ptc.code); ++ ret = conv_spec; ++ ++out: ++ free (pfc.code); ++ free (ptc.code); ++ ++ return ret; ++} ++libc_hidden_def (__gconv_create_spec) +diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h +index 348acc089b..b39b09aea1 100644 +--- a/iconv/gconv_charset.h ++++ b/iconv/gconv_charset.h +@@ -19,9 +19,68 @@ + + #include + #include ++#include ++#include ++#include ++#include ++#include "gconv_int.h" + + +-static void ++/* An iconv encoding is in the form of a triplet, with parts separated by ++ a '/' character. The first part is the standard name, the second part is ++ the character set, and the third part is the error handler. If the first ++ part is sufficient to identify both the standard and the character set ++ then the second part can be empty e.g. UTF-8//. If the first part is not ++ sufficient to identify both the standard and the character set then the ++ second part is required e.g. ISO-10646/UTF8/. If neither the first or ++ second parts are provided e.g. //, then the current locale is used. ++ The actual values used in the first and second parts are not entirely ++ relevant to the implementation. The values themselves are used in a hash ++ table to lookup modules and so the naming convention of the first two parts ++ is somewhat arbitrary and only helps locate the entries in the cache. ++ The third part is the error handler and is comprised of a ',' or '/' ++ separated list of suffixes. Currently, we support "TRANSLIT" for ++ transliteration and "IGNORE" for ignoring conversion errors due to ++ unrecognized input characters. */ ++#define GCONV_TRIPLE_SEPARATOR "/" ++#define GCONV_SUFFIX_SEPARATOR "," ++#define GCONV_TRANSLIT_SUFFIX "TRANSLIT" ++#define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE" ++ ++ ++/* This function accepts the charset names of the source and destination of the ++ conversion and populates *conv_spec with an equivalent conversion ++ specification that may later be used by __gconv_open. The charset names ++ might contain options in the form of suffixes that alter the conversion, ++ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring ++ and truncating any suffix options in fromcode, and processing and truncating ++ any suffix options in tocode. Supported suffix options ("TRANSLIT" or ++ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec ++ to be set to true. Unrecognized suffix options are silently discarded. If ++ the function succeeds, it returns conv_spec back to the caller. It returns ++ NULL upon failure. */ ++struct gconv_spec * ++__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, ++ const char *tocode); ++libc_hidden_proto (__gconv_create_spec) ++ ++ ++/* This function frees all heap memory allocated by __gconv_create_spec. */ ++static void __attribute__ ((unused)) ++gconv_destroy_spec (struct gconv_spec *conv_spec) ++{ ++ free (conv_spec->fromcode); ++ free (conv_spec->tocode); ++ return; ++} ++ ++ ++/* This function copies in-order, characters from the source 's' that are ++ either alpha-numeric or one in one of these: "_-.,:/" - into the destination ++ 'wp' while dropping all other characters. In the process, it converts all ++ alphabetical characters to upper case. It then appends up to two '/' ++ characters so that the total number of '/'es in the destination is 2. */ ++static inline void __attribute__ ((unused, always_inline)) + strip (char *wp, const char *s) + { + int slash_count = 0; +diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h +index fbaf12cee2..e86938dae7 100644 +--- a/iconv/gconv_int.h ++++ b/iconv/gconv_int.h +@@ -75,6 +75,15 @@ struct gconv_module + }; + + ++/* The specification of the conversion that needs to be performed. */ ++struct gconv_spec ++{ ++ char *fromcode; ++ char *tocode; ++ bool translit; ++ bool ignore; ++}; ++ + /* Flags for `gconv_open'. */ + enum + { +@@ -136,10 +145,12 @@ __libc_lock_define (extern, __gconv_lock attribute_hidden) + }) + + +-/* Return in *HANDLE decriptor for transformation from FROMSET to TOSET. */ +-extern int __gconv_open (const char *toset, const char *fromset, +- __gconv_t *handle, int flags) +- attribute_hidden; ++/* Return in *HANDLE, a decriptor for the transformation. The function expects ++ the specification of the transformation in the structure pointed to by ++ CONV_SPEC. It only reads *CONV_SPEC and does not take ownership of it. */ ++extern int __gconv_open (struct gconv_spec *conv_spec, ++ __gconv_t *handle, int flags); ++libc_hidden_proto (__gconv_open) + + /* Free resources associated with transformation descriptor CD. */ + extern int __gconv_close (__gconv_t cd) +diff --git a/iconv/gconv_open.c b/iconv/gconv_open.c +index b39626d252..2878620957 100644 +--- a/iconv/gconv_open.c ++++ b/iconv/gconv_open.c +@@ -31,7 +31,7 @@ + + + int +-__gconv_open (const char *toset, const char *fromset, __gconv_t *handle, ++__gconv_open (struct gconv_spec *conv_spec, __gconv_t *handle, + int flags) + { + struct __gconv_step *steps; +@@ -40,77 +40,38 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, + size_t cnt = 0; + int res; + int conv_flags = 0; +- const char *errhand; +- const char *ignore; + bool translit = false; ++ char *tocode, *fromcode; + + /* Find out whether any error handling method is specified. */ +- errhand = strchr (toset, '/'); +- if (errhand != NULL) +- errhand = strchr (errhand + 1, '/'); +- if (__glibc_likely (errhand != NULL)) +- { +- if (*++errhand == '\0') +- errhand = NULL; +- else +- { +- /* Make copy without the error handling description. */ +- char *newtoset = (char *) alloca (errhand - toset + 1); +- char *tok; +- char *ptr = NULL /* Work around a bogus warning */; +- +- newtoset[errhand - toset] = '\0'; +- toset = memcpy (newtoset, toset, errhand - toset); ++ translit = conv_spec->translit; + +- /* Find the appropriate transliteration handlers. */ +- tok = strdupa (errhand); ++ if (conv_spec->ignore) ++ conv_flags |= __GCONV_IGNORE_ERRORS; + +- tok = __strtok_r (tok, ",", &ptr); +- while (tok != NULL) +- { +- if (__strcasecmp_l (tok, "TRANSLIT", _nl_C_locobj_ptr) == 0) +- translit = true; +- else if (__strcasecmp_l (tok, "IGNORE", _nl_C_locobj_ptr) == 0) +- /* Set the flag to ignore all errors. */ +- conv_flags |= __GCONV_IGNORE_ERRORS; +- +- tok = __strtok_r (NULL, ",", &ptr); +- } +- } +- } +- +- /* For the source character set we ignore the error handler specification. +- XXX Is this really always the best? */ +- ignore = strchr (fromset, '/'); +- if (ignore != NULL && (ignore = strchr (ignore + 1, '/')) != NULL +- && *++ignore != '\0') +- { +- char *newfromset = (char *) alloca (ignore - fromset + 1); +- +- newfromset[ignore - fromset] = '\0'; +- fromset = memcpy (newfromset, fromset, ignore - fromset); +- } ++ tocode = conv_spec->tocode; ++ fromcode = conv_spec->fromcode; + + /* If the string is empty define this to mean the charset of the + currently selected locale. */ +- if (strcmp (toset, "//") == 0) ++ if (strcmp (tocode, "//") == 0) + { + const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); + size_t len = strlen (codeset); + char *dest; +- toset = dest = (char *) alloca (len + 3); ++ tocode = dest = (char *) alloca (len + 3); + memcpy (__mempcpy (dest, codeset, len), "//", 3); + } +- if (strcmp (fromset, "//") == 0) ++ if (strcmp (fromcode, "//") == 0) + { + const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); + size_t len = strlen (codeset); + char *dest; +- fromset = dest = (char *) alloca (len + 3); ++ fromcode = dest = (char *) alloca (len + 3); + memcpy (__mempcpy (dest, codeset, len), "//", 3); + } + +- res = __gconv_find_transform (toset, fromset, &steps, &nsteps, flags); ++ res = __gconv_find_transform (tocode, fromcode, &steps, &nsteps, flags); + if (res == __GCONV_OK) + { + /* Allocate room for handle. */ +@@ -209,3 +170,4 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, + *handle = result; + return res; + } ++libc_hidden_def (__gconv_open) +diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c +index 687067070a..dd54bc12e0 100644 +--- a/iconv/iconv_open.c ++++ b/iconv/iconv_open.c +@@ -31,49 +31,15 @@ + iconv_t + iconv_open (const char *tocode, const char *fromcode) + { +- /* Normalize the name. We remove all characters beside alpha-numeric, +- '_', '-', '/', '.', and ':'. */ +- size_t tocode_len = strlen (tocode) + 3; +- char *tocode_conv; +- bool tocode_usealloca = __libc_use_alloca (tocode_len); +- if (tocode_usealloca) +- tocode_conv = (char *) alloca (tocode_len); +- else +- { +- tocode_conv = (char *) malloc (tocode_len); +- if (tocode_conv == NULL) +- return (iconv_t) -1; +- } +- strip (tocode_conv, tocode); +- tocode = (tocode_conv[2] == '\0' && tocode[0] != '\0' +- ? upstr (tocode_conv, tocode) : tocode_conv); ++ __gconv_t cd; ++ struct gconv_spec conv_spec; + +- size_t fromcode_len = strlen (fromcode) + 3; +- char *fromcode_conv; +- bool fromcode_usealloca = __libc_use_alloca (fromcode_len); +- if (fromcode_usealloca) +- fromcode_conv = (char *) alloca (fromcode_len); +- else +- { +- fromcode_conv = (char *) malloc (fromcode_len); +- if (fromcode_conv == NULL) +- { +- if (! tocode_usealloca) +- free (tocode_conv); +- return (iconv_t) -1; +- } +- } +- strip (fromcode_conv, fromcode); +- fromcode = (fromcode_conv[2] == '\0' && fromcode[0] != '\0' +- ? upstr (fromcode_conv, fromcode) : fromcode_conv); ++ if (__gconv_create_spec (&conv_spec, fromcode, tocode) == NULL) ++ return (iconv_t) -1; + +- __gconv_t cd; +- int res = __gconv_open (tocode, fromcode, &cd, 0); ++ int res = __gconv_open (&conv_spec, &cd, 0); + +- if (! fromcode_usealloca) +- free (fromcode_conv); +- if (! tocode_usealloca) +- free (tocode_conv); ++ gconv_destroy_spec (&conv_spec); + + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 9709e4a701..b4334faa57 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -39,6 +39,7 @@ + #include + #include "iconv_prog.h" + #include "iconvconfig.h" ++#include "gconv_charset.h" + + /* Get libc version number. */ + #include "../version.h" +@@ -118,8 +119,7 @@ main (int argc, char *argv[]) + { + int status = EXIT_SUCCESS; + int remaining; +- iconv_t cd; +- const char *orig_to_code; ++ __gconv_t cd; + struct charmap_t *from_charmap = NULL; + struct charmap_t *to_charmap = NULL; + +@@ -139,39 +139,6 @@ main (int argc, char *argv[]) + exit (EXIT_SUCCESS); + } + +- /* If we have to ignore errors make sure we use the appropriate name for +- the to-character-set. */ +- orig_to_code = to_code; +- if (omit_invalid) +- { +- const char *errhand = strchrnul (to_code, '/'); +- int nslash = 2; +- char *newp; +- char *cp; +- +- if (*errhand == '/') +- { +- --nslash; +- errhand = strchrnul (errhand + 1, '/'); +- +- if (*errhand == '/') +- { +- --nslash; +- errhand = strchr (errhand, '\0'); +- } +- } +- +- newp = (char *) alloca (errhand - to_code + nslash + 7 + 1); +- cp = mempcpy (newp, to_code, errhand - to_code); +- while (nslash-- > 0) +- *cp++ = '/'; +- if (cp[-1] != '/') +- *cp++ = ','; +- memcpy (cp, "IGNORE", sizeof ("IGNORE")); +- +- to_code = newp; +- } +- + /* POSIX 1003.2b introduces a silly thing: the arguments to -t anf -f + can be file names of charmaps. In this case iconv will have to read + those charmaps and use them to do the conversion. But there are +@@ -184,10 +151,10 @@ main (int argc, char *argv[]) + file. */ + from_charmap = charmap_read (from_code, /*0, 1*/1, 0, 0, 0); + +- if (strchr (orig_to_code, '/') != NULL) ++ if (strchr (to_code, '/') != NULL) + /* The to-name might be a charmap file name. Try reading the + file. */ +- to_charmap = charmap_read (orig_to_code, /*0, 1,*/1, 0, 0, 0); ++ to_charmap = charmap_read (to_code, /*0, 1,*/1, 0, 0, 0); + + + /* At this point we have to handle two cases. The first one is +@@ -201,9 +168,25 @@ main (int argc, char *argv[]) + argc, remaining, argv, output_file); + else + { ++ struct gconv_spec conv_spec; ++ int res; ++ ++ if (__gconv_create_spec (&conv_spec, from_code, to_code) == NULL) ++ { ++ error (EXIT_FAILURE, errno, ++ _("failed to start conversion processing")); ++ exit (1); ++ } ++ ++ if (omit_invalid) ++ conv_spec.ignore = true; ++ + /* Let's see whether we have these coded character sets. */ +- cd = iconv_open (to_code, from_code); +- if (cd == (iconv_t) -1) ++ res = __gconv_open (&conv_spec, &cd, 0); ++ ++ gconv_destroy_spec (&conv_spec); ++ ++ if (res != __GCONV_OK) + { + if (errno == EINVAL) + { +@@ -221,7 +204,7 @@ main (int argc, char *argv[]) + const char *from_pretty = + (from_code[0] ? from_code : nl_langinfo (CODESET)); + const char *to_pretty = +- (orig_to_code[0] ? orig_to_code : nl_langinfo (CODESET)); ++ (to_code[0] ? to_code : nl_langinfo (CODESET)); + + if (from_wrong) + { +diff --git a/iconv/tst-iconv-opt.c b/iconv/tst-iconv-opt.c +new file mode 100644 +index 0000000000..669d812a6a +--- /dev/null ++++ b/iconv/tst-iconv-opt.c +@@ -0,0 +1,347 @@ ++/* Test iconv's TRANSLIT and IGNORE option handling ++ ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++/* Run one iconv test. Arguments: ++ to: destination character set and options ++ from: source character set ++ input: input string to be converted ++ exp_in: expected number of bytes consumed ++ exp_ret: expected return value (error or number of irreversible conversions) ++ exp_out: expected output string ++ exp_err: expected value of `errno' after iconv returns. */ ++static void ++test_iconv (const char *to, const char *from, char *input, size_t exp_in, ++ size_t exp_ret, const char *exp_out, int exp_err) ++{ ++ iconv_t cd; ++ char outbuf[500]; ++ size_t inlen, outlen; ++ char *inptr, *outptr; ++ size_t n; ++ ++ cd = iconv_open (to, from); ++ TEST_VERIFY (cd != (iconv_t) -1); ++ ++ inlen = strlen (input); ++ outlen = sizeof (outbuf); ++ inptr = input; ++ outptr = outbuf; ++ ++ errno = 0; ++ n = iconv (cd, &inptr, &inlen, &outptr, &outlen); ++ ++ TEST_COMPARE (n, exp_ret); ++ TEST_VERIFY (inptr == input + exp_in); ++ TEST_COMPARE (errno, exp_err); ++ TEST_COMPARE_BLOB (outbuf, outptr - outbuf, exp_out, strlen (exp_out)); ++ TEST_VERIFY (iconv_close (cd) == 0); ++} ++ ++ ++/* We test option parsing by converting UTF-8 inputs to ASCII under various ++ option combinations. The UTF-8 inputs fall into three categories: ++ - ASCII-only, ++ - non-ASCII, ++ - non-ASCII with invalid UTF-8 characters. */ ++ ++/* 1. */ ++char ascii[] = "Just some ASCII text"; ++ ++/* 2. Valid UTF-8 input and some corresponding expected outputs with various ++ options. The two non-ASCII characters below are accented alphabets: ++ an `a' then an `o'. */ ++char utf8[] = "UTF-8 text with \u00E1 couple \u00F3f non-ASCII characters"; ++char u2a[] = "UTF-8 text with "; ++char u2a_translit[] = "UTF-8 text with a couple of non-ASCII characters"; ++char u2a_ignore[] = "UTF-8 text with couple f non-ASCII characters"; ++ ++/* 3. Invalid UTF-8 input and some corresponding expected outputs. \xff is ++ invalid UTF-8. It's followed by some valid but non-ASCII UTF-8. */ ++char iutf8[] = "Invalid UTF-8 \xff\u27E6text\u27E7"; ++char iu2a[] = "Invalid UTF-8 "; ++char iu2a_ignore[] = "Invalid UTF-8 text"; ++char iu2a_both[] = "Invalid UTF-8 [|text|]"; ++ ++/* 4. Another invalid UTF-8 input and corresponding expected outputs. This time ++ the valid non-ASCII UTF-8 characters appear before the invalid \xff. */ ++char jutf8[] = "Invalid \u27E6UTF-8\u27E7 \xfftext"; ++char ju2a[] = "Invalid "; ++char ju2a_translit[] = "Invalid [|UTF-8|] "; ++char ju2a_ignore[] = "Invalid UTF-8 text"; ++char ju2a_both[] = "Invalid [|UTF-8|] text"; ++ ++/* We also test option handling for character set names that have the form ++ "A/B". In this test, we test conversions "ISO-10646/UTF-8", and either ++ ISO-8859-1 or ASCII. */ ++ ++/* 5. Accented 'A' and 'a' characters in ISO-8859-1 and UTF-8, and an ++ equivalent ASCII transliteration. */ ++char iso8859_1_a[] = {0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* Accented A's. */ ++ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* Accented a's. */ ++ 0x00}; ++char utf8_a[] = "\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5" ++ "\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5"; ++char ascii_a[] = "AAAAAAaaaaaa"; ++ ++/* 6. An invalid ASCII string where [0] is invalid and [1] is '~'. */ ++char iascii [] = {0x80, '~', '\0'}; ++char empty[] = ""; ++char ia2u_ignore[] = "~"; ++ ++static int ++do_test (void) ++{ ++ xsetlocale (LC_ALL, "en_US.UTF-8"); ++ ++ ++ /* 0. iconv_open should gracefully fail for invalid character sets. */ ++ ++ TEST_VERIFY (iconv_open ("INVALID", "UTF-8") == (iconv_t) -1); ++ TEST_VERIFY (iconv_open ("UTF-8", "INVALID") == (iconv_t) -1); ++ TEST_VERIFY (iconv_open ("INVALID", "INVALID") == (iconv_t) -1); ++ ++ ++ /* 1. ASCII-only UTF-8 input should convert to ASCII with no changes: */ ++ ++ test_iconv ("ASCII", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); ++ test_iconv ("ASCII//", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); ++ test_iconv ("ASCII//TRANSLIT", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); ++ test_iconv ("ASCII//TRANSLIT//", "UTF-8", ascii, strlen (ascii), 0, ascii, ++ 0); ++ test_iconv ("ASCII//IGNORE", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); ++ test_iconv ("ASCII//IGNORE//", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); ++ ++ ++ /* 2. Valid UTF-8 input with non-ASCII characters: */ ++ ++ /* EILSEQ when converted to ASCII. */ ++ test_iconv ("ASCII", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, EILSEQ); ++ ++ /* Converted without error with TRANSLIT enabled. */ ++ test_iconv ("ASCII//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, u2a_translit, ++ 0); ++ ++ /* EILSEQ with IGNORE enabled. Non-ASCII chars dropped from output. */ ++ test_iconv ("ASCII//IGNORE", "UTF-8", utf8, strlen (utf8), (size_t) -1, ++ u2a_ignore, EILSEQ); ++ ++ /* With TRANSLIT and IGNORE enabled, transliterated without error. We test ++ four combinations. */ ++ ++ test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ ++ test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ ++ /* Misspellings of TRANSLIT and IGNORE are ignored, but conversion still ++ works while respecting any other correctly spelled options. */ ++ ++ test_iconv ("ASCII//T", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, ++ EILSEQ); ++ test_iconv ("ASCII//TRANSLITERATE", "UTF-8", utf8, strlen (u2a), (size_t) -1, ++ u2a, EILSEQ); ++ test_iconv ("ASCII//I", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, ++ EILSEQ); ++ test_iconv ("ASCII//IGNORED", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, ++ EILSEQ); ++ test_iconv ("ASCII//TRANSLITERATE//IGNORED", "UTF-8", utf8, strlen (u2a), ++ (size_t) -1, u2a, EILSEQ); ++ test_iconv ("ASCII//IGNORED,TRANSLITERATE", "UTF-8", utf8, strlen (u2a), ++ (size_t) -1, u2a, EILSEQ); ++ test_iconv ("ASCII//T//I", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, ++ EILSEQ); ++ ++ test_iconv ("ASCII//TRANSLIT//I", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ ++ test_iconv ("ASCII//I//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ test_iconv ("ASCII//IGNORED,TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ test_iconv ("ASCII//TRANSLIT,IGNORED", "UTF-8", utf8, strlen (utf8), 2, ++ u2a_translit, 0); ++ ++ test_iconv ("ASCII//IGNORE,T", "UTF-8", utf8, strlen (utf8), (size_t) -1, ++ u2a_ignore, EILSEQ); ++ test_iconv ("ASCII//T,IGNORE", "UTF-8", utf8, strlen (utf8), (size_t) -1, ++ u2a_ignore, EILSEQ); ++ /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ ++ test_iconv ("ASCII//TRANSLITERATE//IGNORE", "UTF-8", utf8, strlen (utf8), ++ (size_t) -1, u2a_ignore, EILSEQ); ++ test_iconv ("ASCII//IGNORE//TRANSLITERATE", "UTF-8", utf8, strlen (utf8), ++ (size_t) -1, u2a_ignore, EILSEQ); ++ ++ ++ /* 3. Invalid UTF-8 followed by some valid non-ASCII UTF-8 characters: */ ++ ++ /* EILSEQ; output is truncated at the first invalid UTF-8 character. */ ++ test_iconv ("ASCII", "UTF-8", iutf8, strlen (iu2a), (size_t) -1, iu2a, ++ EILSEQ); ++ ++ /* With TRANSLIT enabled: EILSEQ; output still truncated at the first invalid ++ UTF-8 character. */ ++ test_iconv ("ASCII//TRANSLIT", "UTF-8", iutf8, strlen (iu2a), (size_t) -1, ++ iu2a, EILSEQ); ++ ++ /* With IGNORE enabled: EILSEQ; output omits invalid UTF-8 characters and ++ valid UTF-8 non-ASCII characters. */ ++ test_iconv ("ASCII//IGNORE", "UTF-8", iutf8, strlen (iutf8), (size_t) -1, ++ iu2a_ignore, EILSEQ); ++ ++ /* With TRANSLIT and IGNORE enabled, output omits only invalid UTF-8 ++ characters and transliterates valid non-ASCII UTF-8 characters. We test ++ four combinations. */ ++ ++ test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", iutf8, strlen (iutf8), 2, ++ iu2a_both, 0); ++ /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ ++ test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", iutf8, strlen (iutf8), 2, ++ iu2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", iutf8, strlen (iutf8), 2, ++ iu2a_both, 0); ++ /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ ++ test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", iutf8, strlen (iutf8), 2, ++ iu2a_both, 0); ++ ++ ++ /* 4. Invalid UTF-8 with valid non-ASCII UTF-8 chars appearing first: */ ++ ++ /* EILSEQ; output is truncated at the first non-ASCII character. */ ++ test_iconv ("ASCII", "UTF-8", jutf8, strlen (ju2a), (size_t) -1, ju2a, ++ EILSEQ); ++ ++ /* With TRANSLIT enabled: EILSEQ; output now truncated at the first invalid ++ UTF-8 character. */ ++ test_iconv ("ASCII//TRANSLIT", "UTF-8", jutf8, strlen (jutf8) - 5, ++ (size_t) -1, ju2a_translit, EILSEQ); ++ test_iconv ("ASCII//translit", "UTF-8", jutf8, strlen (jutf8) - 5, ++ (size_t) -1, ju2a_translit, EILSEQ); ++ ++ /* With IGNORE enabled: EILSEQ; output omits invalid UTF-8 characters and ++ valid UTF-8 non-ASCII characters. */ ++ test_iconv ("ASCII//IGNORE", "UTF-8", jutf8, strlen (jutf8), (size_t) -1, ++ ju2a_ignore, EILSEQ); ++ test_iconv ("ASCII//ignore", "UTF-8", jutf8, strlen (jutf8), (size_t) -1, ++ ju2a_ignore, EILSEQ); ++ ++ /* With TRANSLIT and IGNORE enabled, output omits only invalid UTF-8 ++ characters and transliterates valid non-ASCII UTF-8 characters. We test ++ several combinations. */ ++ ++ test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ ++ test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ ++ test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//translit,ignore", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ /* Trailing whitespace and separators should be ignored. */ ++ test_iconv ("ASCII//IGNORE,TRANSLIT ", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT/", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT//", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT,", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT,,", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ test_iconv ("ASCII//IGNORE,TRANSLIT /,", "UTF-8", jutf8, strlen (jutf8), 2, ++ ju2a_both, 0); ++ ++ /* TRANSLIT or IGNORE suffixes in fromcode should be ignored. */ ++ test_iconv ("ASCII", "UTF-8//TRANSLIT", jutf8, strlen (ju2a), (size_t) -1, ++ ju2a, EILSEQ); ++ test_iconv ("ASCII", "UTF-8//IGNORE", jutf8, strlen (ju2a), (size_t) -1, ++ ju2a, EILSEQ); ++ test_iconv ("ASCII", "UTF-8//TRANSLIT,IGNORE", jutf8, strlen (ju2a), ++ (size_t) -1, ju2a, EILSEQ); ++ ++ ++ /* 5. Charset names of the form "A/B/": */ ++ ++ /* ISO-8859-1 is converted to UTF-8 without needing transliteration. */ ++ test_iconv ("ISO-10646/UTF-8", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8/", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8/IGNORE", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8//IGNORE", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8/TRANSLIT", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8//TRANSLIT", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8//TRANSLIT/IGNORE", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8//TRANSLIT//IGNORE", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ test_iconv ("ISO-10646/UTF-8/TRANSLIT,IGNORE", "ISO-8859-1", iso8859_1_a, ++ strlen (iso8859_1_a), 0, utf8_a, 0); ++ ++ /* UTF-8 with accented A's is converted to ASCII with transliteration. */ ++ test_iconv ("ASCII", "ISO-10646/UTF-8", utf8_a, ++ 0, (size_t) -1, empty, EILSEQ); ++ test_iconv ("ASCII//IGNORE", "ISO-10646/UTF-8", utf8_a, ++ strlen (utf8_a), (size_t) -1, empty, EILSEQ); ++ test_iconv ("ASCII//TRANSLIT", "ISO-10646/UTF-8", utf8_a, ++ strlen (utf8_a), 12, ascii_a, 0); ++ ++ /* Invalid ASCII is converted to UTF-8 only with IGNORE. */ ++ test_iconv ("ISO-10646/UTF-8", "ASCII", iascii, strlen (empty), (size_t) -1, ++ empty, EILSEQ); ++ test_iconv ("ISO-10646/UTF-8/TRANSLIT", "ASCII", iascii, strlen (empty), ++ (size_t) -1, empty, EILSEQ); ++ test_iconv ("ISO-10646/UTF-8/IGNORE", "ASCII", iascii, strlen (iascii), ++ (size_t) -1, ia2u_ignore, EILSEQ); ++ test_iconv ("ISO-10646/UTF-8/TRANSLIT,IGNORE", "ASCII", iascii, ++ strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); ++ /* Due to bug 19519, iconv was ignoring IGNORE for the following three ++ inputs: */ ++ test_iconv ("ISO-10646/UTF-8/TRANSLIT/IGNORE", "ASCII", iascii, ++ strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); ++ test_iconv ("ISO-10646/UTF-8//TRANSLIT,IGNORE", "ASCII", iascii, ++ strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); ++ test_iconv ("ISO-10646/UTF-8//TRANSLIT//IGNORE", "ASCII", iascii, ++ strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); ++ ++ return 0; ++} ++ ++#include +diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh +new file mode 100644 +index 0000000000..8298136b7f +--- /dev/null ++++ b/iconv/tst-iconv_prog.sh +@@ -0,0 +1,280 @@ ++#!/bin/bash ++# Test for some known iconv(1) hangs from bug 19519, and miscellaneous ++# iconv(1) program error conditions. ++# Copyright (C) 2020 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++ ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++ ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++ ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++codir=$1 ++test_wrapper_env="$2" ++run_program_env="$3" ++ ++# We have to have some directories in the library path. ++LIBPATH=$codir:$codir/iconvdata ++ ++# How the start the iconv(1) program. $from is not defined/expanded yet. ++ICONV=' ++$codir/elf/ld.so --library-path $LIBPATH --inhibit-rpath ${from}.so ++$codir/iconv/iconv_prog ++' ++ICONV="$test_wrapper_env $run_program_env $ICONV" ++ ++# List of known hangs; ++# Gathered by running an exhaustive 2 byte input search against glibc-2.28 ++hangarray=( ++"\x00\x23;-c;ANSI_X3.110;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa1;-c;ARMSCII-8;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa1;-c;ASMO_449;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;BIG5;UTF-8//TRANSLIT//IGNORE" ++"\x00\xff;-c;BIG5HKSCS;UTF-8//TRANSLIT//IGNORE" ++"\x00\xff;-c;BRF;UTF-8//TRANSLIT//IGNORE" ++"\x00\xff;-c;BS_4730;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1250;UTF-8//TRANSLIT//IGNORE" ++"\x00\x98;-c;CP1251;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1252;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1253;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1254;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1255;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1257;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;CP1258;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;CP932;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;CSA_Z243.4-1985-1;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;CSA_Z243.4-1985-2;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;DEC-MCS;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;DIN_66003;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;DS_2089;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-AT-DE;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-AT-DE-A;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-CA-FR;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-DK-NO;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-DK-NO-A;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-ES;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-ES-A;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-ES-S;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-FI-SE;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-FI-SE-A;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-FR;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-IS-FRISS;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-IT;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-PT;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-UK;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;EBCDIC-US;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ES;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ES2;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-CN;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-JISX0213;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-JP;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-JP-MS;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-KR;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;EUC-TW;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GB18030;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GB_1988-80;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GBK;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GOST_19768-74;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GREEK7;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GREEK7-OLD;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;GREEK-CCITT;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;HP-GREEK8;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;HP-ROMAN8;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;HP-ROMAN9;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;HP-THAI8;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;HP-TURKISH8;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM038;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM1004;UTF-8//TRANSLIT//IGNORE" ++"\x00\xff;-c;IBM1008;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;IBM1046;UTF-8//TRANSLIT//IGNORE" ++"\x00\x51;-c;IBM1132;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa0;-c;IBM1133;UTF-8//TRANSLIT//IGNORE" ++"\x00\xce;-c;IBM1137;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM1161;UTF-8//TRANSLIT//IGNORE" ++"\x00\xdb;-c;IBM1162;UTF-8//TRANSLIT//IGNORE" ++"\x00\x70;-c;IBM12712;UTF-8//TRANSLIT//IGNORE" ++# These are known hangs that are yet to be fixed: ++# "\x00\x0f;-c;IBM1364;UTF-8" ++# "\x00\x0f;-c;IBM1371;UTF-8" ++# "\x00\x0f;-c;IBM1388;UTF-8" ++# "\x00\x0f;-c;IBM1390;UTF-8" ++# "\x00\x0f;-c;IBM1399;UTF-8" ++"\x00\x53;-c;IBM16804;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM274;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM275;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM281;UTF-8//TRANSLIT//IGNORE" ++"\x00\x57;-c;IBM290;UTF-8//TRANSLIT//IGNORE" ++"\x00\x45;-c;IBM420;UTF-8//TRANSLIT//IGNORE" ++"\x00\x68;-c;IBM423;UTF-8//TRANSLIT//IGNORE" ++"\x00\x70;-c;IBM424;UTF-8//TRANSLIT//IGNORE" ++"\x00\x53;-c;IBM4517;UTF-8//TRANSLIT//IGNORE" ++"\x00\x53;-c;IBM4899;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa5;-c;IBM4909;UTF-8//TRANSLIT//IGNORE" ++"\x00\xdc;-c;IBM4971;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM803;UTF-8//TRANSLIT//IGNORE" ++"\x00\x91;-c;IBM851;UTF-8//TRANSLIT//IGNORE" ++"\x00\x9b;-c;IBM856;UTF-8//TRANSLIT//IGNORE" ++"\x00\xd5;-c;IBM857;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;IBM864;UTF-8//TRANSLIT//IGNORE" ++"\x00\x94;-c;IBM868;UTF-8//TRANSLIT//IGNORE" ++"\x00\x94;-c;IBM869;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;IBM874;UTF-8//TRANSLIT//IGNORE" ++"\x00\x6a;-c;IBM875;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM880;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM891;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;IBM903;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;IBM904;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM905;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM9066;UTF-8//TRANSLIT//IGNORE" ++"\x00\x48;-c;IBM918;UTF-8//TRANSLIT//IGNORE" ++"\x00\x57;-c;IBM930;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM932;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM933;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM935;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM937;UTF-8//TRANSLIT//IGNORE" ++"\x00\x41;-c;IBM939;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IBM943;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;INIS;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;INIS-8;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;INIS-CYRILLIC;UTF-8//TRANSLIT//IGNORE" ++"\x00\xec;-c;ISIRI-3342;UTF-8//TRANSLIT//IGNORE" ++"\x00\xec;-c;ISO_10367-BOX;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-CN;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-CN-EXT;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-JP;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-JP-2;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-JP-3;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-2022-KR;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO_2033;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO_5427;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO_5427-EXT;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO_5428;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa4;-c;ISO_6937;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa0;-c;ISO_6937-2;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-8859-11;UTF-8//TRANSLIT//IGNORE" ++"\x00\xa5;-c;ISO-8859-3;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-8859-6;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-8859-7;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;ISO-8859-8;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;ISO-IR-197;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;ISO-IR-209;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;IT;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;JIS_C6220-1969-RO;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;JIS_C6229-1984-B;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;JOHAB;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;JUS_I.B1.002;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;KOI-8;UTF-8//TRANSLIT//IGNORE" ++"\x00\x88;-c;KOI8-T;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;KSC5636;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;LATIN-GREEK;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;LATIN-GREEK-1;UTF-8//TRANSLIT//IGNORE" ++"\x00\xf6;-c;MAC-IS;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;MSZ_7795.3;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NATS-DANO;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NATS-SEFI;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NC_NC00-10;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NF_Z_62-010;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NF_Z_62-010_1973;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NS_4551-1;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;NS_4551-2;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;PT;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;PT2;UTF-8//TRANSLIT//IGNORE" ++"\x00\x98;-c;RK1048;UTF-8//TRANSLIT//IGNORE" ++"\x00\x98;-c;SEN_850200_B;UTF-8//TRANSLIT//IGNORE" ++"\x00\x98;-c;SEN_850200_C;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;Shift_JISX0213;UTF-8//TRANSLIT//IGNORE" ++"\x00\x80;-c;SJIS;UTF-8//TRANSLIT//IGNORE" ++"\x00\x23;-c;T.61-8BIT;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;TIS-620;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;TSCII;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;UHC;UTF-8//TRANSLIT//IGNORE" ++"\x00\xd8;-c;UNICODE;UTF-8//TRANSLIT//IGNORE" ++"\x00\xdc;-c;UTF-16;UTF-8//TRANSLIT//IGNORE" ++"\xdc\x00;-c;UTF-16BE;UTF-8//TRANSLIT//IGNORE" ++"\x00\xdc;-c;UTF-16LE;UTF-8//TRANSLIT//IGNORE" ++"\xff\xff;-c;UTF-7;UTF-8//TRANSLIT//IGNORE" ++"\x00\x81;-c;WIN-SAMI-2;UTF-8//TRANSLIT//IGNORE" ++) ++ ++# List of option combinations that *should* lead to an error ++errorarray=( ++# Converting from/to invalid character sets should cause error ++"\x00\x00;;INVALID;INVALID" ++"\x00\x00;;INVALID;UTF-8" ++"\x00\x00;;UTF-8;INVALID" ++) ++ ++# Requires $twobyte input, $c flag, $from, and $to to be set; sets $ret ++execute_test () ++{ ++ eval PROG=\"$ICONV\" ++ echo -en "$twobyte" \ ++ | timeout -k 4 3 $PROG $c -f $from -t "$to" &>/dev/null ++ ret=$? ++} ++ ++check_hangtest_result () ++{ ++ if [ "$ret" -eq "124" ] || [ "$ret" -eq "137" ]; then # timeout/hang ++ result="HANG" ++ else ++ if [ "$ret" -eq "139" ]; then # segfault ++ result="SEGFAULT" ++ else ++ if [ "$ret" -gt "127" ]; then # unexpected error ++ result="UNEXPECTED" ++ else ++ result="OK" ++ fi ++ fi ++ fi ++ ++ echo -n "$result: from: \"$from\", to: \"$to\"," ++ echo " input \"$twobyte\", flags \"$c\"" ++ ++ if [ "$result" != "OK" ]; then ++ exit 1 ++ fi ++} ++ ++for hangcommand in "${hangarray[@]}"; do ++ twobyte="$(echo "$hangcommand" | cut -d";" -f 1)" ++ c="$(echo "$hangcommand" | cut -d";" -f 2)" ++ from="$(echo "$hangcommand" | cut -d";" -f 3)" ++ to="$(echo "$hangcommand" | cut -d";" -f 4)" ++ execute_test ++ check_hangtest_result ++done ++ ++check_errtest_result () ++{ ++ if [ "$ret" -eq "1" ]; then # we errored out as expected ++ result="PASS" ++ else ++ result="FAIL" ++ fi ++ echo -n "$result: from: \"$from\", to: \"$to\"," ++ echo " input \"$twobyte\", flags \"$c\", return code $ret" ++ ++ if [ "$result" != "PASS" ]; then ++ exit 1 ++ fi ++} ++ ++for errorcommand in "${errorarray[@]}"; do ++ twobyte="$(echo "$errorcommand" | cut -d";" -f 1)" ++ c="$(echo "$errorcommand" | cut -d";" -f 2)" ++ from="$(echo "$errorcommand" | cut -d";" -f 3)" ++ to="$(echo "$errorcommand" | cut -d";" -f 4)" ++ execute_test ++ check_errtest_result ++done +diff --git a/intl/dcigettext.c b/intl/dcigettext.c +index 465c8df34c..2e7c662bc7 100644 +--- a/intl/dcigettext.c ++++ b/intl/dcigettext.c +@@ -1119,11 +1119,16 @@ _nl_find_msg (struct loaded_l10nfile *domain_file, + outcharset = encoding; + + # ifdef _LIBC +- /* We always want to use transliteration. */ +- outcharset = norm_add_slashes (outcharset, "TRANSLIT"); +- charset = norm_add_slashes (charset, ""); +- int r = __gconv_open (outcharset, charset, &convd->conv, +- GCONV_AVOID_NOCONV); ++ ++ struct gconv_spec conv_spec ++ = { .fromcode = norm_add_slashes (charset, ""), ++ .tocode = norm_add_slashes (outcharset, ""), ++ /* We always want to use transliteration. */ ++ .translit = true, ++ .ignore = false ++ }; ++ int r = __gconv_open (&conv_spec, &convd->conv, ++ GCONV_AVOID_NOCONV); + if (__builtin_expect (r != __GCONV_OK, 0)) + { + /* If the output encoding is the same there is +-- +2.27.0 diff --git a/glibc.spec b/glibc.spec index 88257b21bbaf1ee978656aa3a997f3024fdd1e61..6cf45185936e6cc5b46ffe6cae12043b46967e46 100644 --- a/glibc.spec +++ b/glibc.spec @@ -59,7 +59,7 @@ ############################################################################## Name: glibc Version: 2.28 -Release: 82 +Release: 83 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -141,6 +141,9 @@ Patch57: backport-gconv-Do-not-emit-spurious-NUL-character-in-ISO-2022.patch Patch58: backport-nss-make-sure-startp_initialized-do-first.patch Patch59: backport-glibc-fix-CVE-2019-1010023.patch Patch60: backport-fix-pthread_create-and-dlopen-racing.patch +Patch61: revert-Add-miss-brackets.patch +Patch62: Rewrite-iconv-option-parsing-BZ-19519.patch +Patch63: intl-Handle-translation-output-codesets-with-suffixes-BZ-26383.patch Provides: ldconfig rtld(GNU_HASH) bundled(gnulib) @@ -1165,6 +1168,11 @@ fi %doc hesiod/README.hesiod %changelog +* Fri Dec 3 2021 Yang yanchao - 2.28-83 +- iconv: revert Add missing brackets + Rewrite iconv option parsing [BZ #19519] + intl: Handle translation output codesets with suffixes [BZ #26383] + * Thu Dec 2 2021 xujing - 2.28-82 - elf: fix pthread_create and dlopen racing diff --git a/intl-Handle-translation-output-codesets-with-suffixes-BZ-26383.patch b/intl-Handle-translation-output-codesets-with-suffixes-BZ-26383.patch new file mode 100644 index 0000000000000000000000000000000000000000..38aff9b6dc3cf5dc65e13f1e503cb4e671ed212f --- /dev/null +++ b/intl-Handle-translation-output-codesets-with-suffixes-BZ-26383.patch @@ -0,0 +1,247 @@ +From 7d4ec75e111291851620c6aa2c4460647b7fd50d Mon Sep 17 00:00:00 2001 +From: Arjun Shankar +Date: Fri, 25 Sep 2020 14:47:06 +0200 +Subject: [PATCH] intl: Handle translation output codesets with suffixes [BZ + #26383] + +Commit 91927b7c7643 (Rewrite iconv option parsing [BZ #19519]) did not +handle cases where the output codeset for translations (via the `gettext' +family of functions) might have a caller specified encoding suffix such as +TRANSLIT or IGNORE. This led to a regression where translations did not +work when the codeset had a suffix. + +This commit fixes the above issue by parsing any suffixes passed to +__dcigettext and adds two new test-cases to intl/tst-codeset.c to +verify correct behaviour. The iconv-internal function __gconv_create_spec +and the static iconv-internal function gconv_destroy_spec are now visible +internally within glibc and used in intl/dcigettext.c. +--- + iconv/Versions | 4 +++- + iconv/gconv_charset.c | 10 ++++++++++ + iconv/gconv_charset.h | 27 --------------------------- + iconv/gconv_int.h | 21 +++++++++++++++++++++ + iconv/iconv_open.c | 2 +- + iconv/iconv_prog.c | 2 +- + intl/dcigettext.c | 17 ++++++++++------- + intl/tst-codeset.c | 34 ++++++++++++++-------------------- + 8 files changed, 60 insertions(+), 57 deletions(-) + +diff --git a/iconv/Versions b/iconv/Versions +index 8a5f4cf780..d51af52fa3 100644 +--- a/iconv/Versions ++++ b/iconv/Versions +@@ -6,7 +6,9 @@ libc { + GLIBC_PRIVATE { + # functions shared with iconv program + __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; +- __gconv_open; __gconv_create_spec; ++ ++ # functions used elsewhere in glibc ++ __gconv_open; __gconv_create_spec; __gconv_destroy_spec; + + # function used by the gconv modules + __gconv_transliterate; +diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c +index 6ccd0773cc..4ba0aa99f5 100644 +--- a/iconv/gconv_charset.c ++++ b/iconv/gconv_charset.c +@@ -216,3 +216,13 @@ out: + return ret; + } + libc_hidden_def (__gconv_create_spec) ++ ++ ++void ++__gconv_destroy_spec (struct gconv_spec *conv_spec) ++{ ++ free (conv_spec->fromcode); ++ free (conv_spec->tocode); ++ return; ++} ++libc_hidden_def (__gconv_destroy_spec) +diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h +index b39b09aea1..e9c122cf7e 100644 +--- a/iconv/gconv_charset.h ++++ b/iconv/gconv_charset.h +@@ -48,33 +48,6 @@ + #define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE" + + +-/* This function accepts the charset names of the source and destination of the +- conversion and populates *conv_spec with an equivalent conversion +- specification that may later be used by __gconv_open. The charset names +- might contain options in the form of suffixes that alter the conversion, +- e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring +- and truncating any suffix options in fromcode, and processing and truncating +- any suffix options in tocode. Supported suffix options ("TRANSLIT" or +- "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec +- to be set to true. Unrecognized suffix options are silently discarded. If +- the function succeeds, it returns conv_spec back to the caller. It returns +- NULL upon failure. */ +-struct gconv_spec * +-__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, +- const char *tocode); +-libc_hidden_proto (__gconv_create_spec) +- +- +-/* This function frees all heap memory allocated by __gconv_create_spec. */ +-static void __attribute__ ((unused)) +-gconv_destroy_spec (struct gconv_spec *conv_spec) +-{ +- free (conv_spec->fromcode); +- free (conv_spec->tocode); +- return; +-} +- +- + /* This function copies in-order, characters from the source 's' that are + either alpha-numeric or one in one of these: "_-.,:/" - into the destination + 'wp' while dropping all other characters. In the process, it converts all +diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h +index e86938dae7..f721ce30ff 100644 +--- a/iconv/gconv_int.h ++++ b/iconv/gconv_int.h +@@ -152,6 +152,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec, + __gconv_t *handle, int flags); + libc_hidden_proto (__gconv_open) + ++/* This function accepts the charset names of the source and destination of the ++ conversion and populates *conv_spec with an equivalent conversion ++ specification that may later be used by __gconv_open. The charset names ++ might contain options in the form of suffixes that alter the conversion, ++ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring ++ and truncating any suffix options in fromcode, and processing and truncating ++ any suffix options in tocode. Supported suffix options ("TRANSLIT" or ++ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec ++ to be set to true. Unrecognized suffix options are silently discarded. If ++ the function succeeds, it returns conv_spec back to the caller. It returns ++ NULL upon failure. */ ++extern struct gconv_spec * ++__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, ++ const char *tocode); ++libc_hidden_proto (__gconv_create_spec) ++ ++/* This function frees all heap memory allocated by __gconv_create_spec. */ ++extern void ++__gconv_destroy_spec (struct gconv_spec *conv_spec); ++libc_hidden_proto (__gconv_destroy_spec) ++ + /* Free resources associated with transformation descriptor CD. */ + extern int __gconv_close (__gconv_t cd) + attribute_hidden; +diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c +index dd54bc12e0..5b30055c04 100644 +--- a/iconv/iconv_open.c ++++ b/iconv/iconv_open.c +@@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode) + + int res = __gconv_open (&conv_spec, &cd, 0); + +- gconv_destroy_spec (&conv_spec); ++ __gconv_destroy_spec (&conv_spec); + + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index b4334faa57..d59979759c 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -184,7 +184,7 @@ main (int argc, char *argv[]) + /* Let's see whether we have these coded character sets. */ + res = __gconv_open (&conv_spec, &cd, 0); + +- gconv_destroy_spec (&conv_spec); ++ __gconv_destroy_spec (&conv_spec); + + if (res != __GCONV_OK) + { +diff --git a/intl/dcigettext.c b/intl/dcigettext.c +index 2e7c662bc7..bd332e71da 100644 +--- a/intl/dcigettext.c ++++ b/intl/dcigettext.c +@@ -1120,15 +1120,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file, + + # ifdef _LIBC + +- struct gconv_spec conv_spec +- = { .fromcode = norm_add_slashes (charset, ""), +- .tocode = norm_add_slashes (outcharset, ""), +- /* We always want to use transliteration. */ +- .translit = true, +- .ignore = false +- }; ++ struct gconv_spec conv_spec; ++ ++ __gconv_create_spec (&conv_spec, charset, outcharset); ++ ++ /* We always want to use transliteration. */ ++ conv_spec.translit = true; ++ + int r = __gconv_open (&conv_spec, &convd->conv, + GCONV_AVOID_NOCONV); ++ ++ __gconv_destroy_spec (&conv_spec); ++ + if (__builtin_expect (r != __GCONV_OK, 0)) + { + /* If the output encoding is the same there is +diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c +index fd70432eca..e9f6e5e09f 100644 +--- a/intl/tst-codeset.c ++++ b/intl/tst-codeset.c +@@ -22,13 +22,11 @@ + #include + #include + #include ++#include + + static int + do_test (void) + { +- char *s; +- int result = 0; +- + unsetenv ("LANGUAGE"); + unsetenv ("OUTPUT_CHARSET"); + setlocale (LC_ALL, "de_DE.ISO-8859-1"); +@@ -36,25 +34,21 @@ do_test (void) + bindtextdomain ("codeset", OBJPFX "domaindir"); + + /* Here we expect output in ISO-8859-1. */ +- s = gettext ("cheese"); +- if (strcmp (s, "K\344se")) +- { +- printf ("call 1 returned: %s\n", s); +- result = 1; +- } ++ TEST_COMPARE_STRING (gettext ("cheese"), "K\344se"); + ++ /* Here we expect output in UTF-8. */ + bind_textdomain_codeset ("codeset", "UTF-8"); ++ TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se"); + +- /* Here we expect output in UTF-8. */ +- s = gettext ("cheese"); +- if (strcmp (s, "K\303\244se")) +- { +- printf ("call 2 returned: %s\n", s); +- result = 1; +- } +- +- return result; ++ /* `a with umlaut' is transliterated to `ae'. */ ++ bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT"); ++ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); ++ ++ /* Transliteration also works by default even if not set. */ ++ bind_textdomain_codeset ("codeset", "ASCII"); ++ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); ++ ++ return 0; + } + +-#define TEST_FUNCTION do_test () +-#include "../test-skeleton.c" ++#include +-- +2.27.0 diff --git a/revert-Add-miss-brackets.patch b/revert-Add-miss-brackets.patch new file mode 100644 index 0000000000000000000000000000000000000000..838dc2b1f8ed87adb543ebe3ac032df1e7be6d55 --- /dev/null +++ b/revert-Add-miss-brackets.patch @@ -0,0 +1,35 @@ +From f8db68106391e4a7730d2de3a1ab00e7c8e62176 Mon Sep 17 00:00:00 2001 +From: Yang Yanchao +Date: Fri, 3 Dec 2021 17:34:18 +0800 +Subject: [PATCH] revert Add missing brackets. + +revert https://sourceware.org/bugzilla/attachment.cgi?id=9876&action=edit +which from https://sourceware.org/bugzilla/show_bug.cgi?id=19519 +to fix CVE-2016-10228 +but might cause transcode errors may occur in certain scenarios. + + +--- + iconv/iconv_prog.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 67bbcff3..52e9d3f3 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -487,11 +487,7 @@ process_block (iconv_t cd, char *addr, size_t len, FILE **output, + if (len == 0) + n = 0; + else +- { +- errno = E2BIG; +- ++addr; +- --len; +- } ++ errno = E2BIG; + } + + if (outptr != outbuf) +-- +2.23.0 +