diff --git a/backport-Add-TEST_COMPARE_STRING_WIDE-to-support-check.h.patch b/backport-Add-TEST_COMPARE_STRING_WIDE-to-support-check.h.patch new file mode 100644 index 0000000000000000000000000000000000000000..e97367b21c3d34e87cb9f0edbab23f088ccbdd40 --- /dev/null +++ b/backport-Add-TEST_COMPARE_STRING_WIDE-to-support-check.h.patch @@ -0,0 +1,757 @@ +From de82cb0da4b8fa5b3d56c457438d2568c67ab1b1 Mon Sep 17 00:00:00 2001 +From: Joseph Myers +Date: Tue, 12 Oct 2021 13:48:39 +0000 +Subject: [PATCH] Add TEST_COMPARE_STRING_WIDE to support/check.h + +I'd like to be able to test narrow and wide string interfaces, with +the narrow string tests using TEST_COMPARE_STRING and the wide string +tests using something analogous (possibly generated using macros from +a common test template for both the narrow and wide string tests where +appropriate). + +Add such a TEST_COMPARE_STRING_WIDE, along with functions +support_quote_blob_wide and support_test_compare_string_wide that it +builds on. Those functions are built using macros from common +templates shared by the narrow and wide string implementations, though +I didn't do that for the tests of test functions. In +support_quote_blob_wide, I chose to use the \x{} delimited escape +sequence syntax proposed for C2X in N2785, rather than e.g. trying to +generate the end of a string and the start of a new string when +ambiguity would result from undelimited \x (when the next character +after such an escape sequence is valid hex) or forcing an escape +sequence to be used for the next character in the case of such +ambiguity. + +Tested for x86_64. +Conflict:NA +Reference:https://sourceware.org/git/?p=glibc.git;a=commit;h=de82cb0da4b8fa5b3d56c457438d2568c67ab1b1 +--- + support/Makefile | 4 + + support/check.h | 15 +++ + support/support.h | 6 ++ + support/support_quote_blob.c | 71 ++------------ + support/support_quote_blob_main.c | 88 +++++++++++++++++ + support/support_quote_blob_wide.c | 24 +++++ + support/support_test_compare_string.c | 83 ++-------------- + support/support_test_compare_string_main.c | 94 ++++++++++++++++++ + support/support_test_compare_string_wide.c | 28 ++++++ + support/tst-support_quote_blob_wide.c | 66 +++++++++++++ + support/tst-test_compare_string_wide.c | 107 +++++++++++++++++++++ + 11 files changed, 448 insertions(+), 138 deletions(-) + create mode 100644 support/support_quote_blob_main.c + create mode 100644 support/support_quote_blob_wide.c + create mode 100644 support/support_test_compare_string_main.c + create mode 100644 support/support_test_compare_string_wide.c + create mode 100644 support/tst-support_quote_blob_wide.c + create mode 100644 support/tst-test_compare_string_wide.c + +diff --git a/support/Makefile b/support/Makefile +index fd27c8451e..7f03950914 100644 +--- a/support/Makefile ++++ b/support/Makefile +@@ -71,6 +71,7 @@ libsupport-routines = \ + support_openpty \ + support_paths \ + support_quote_blob \ ++ support_quote_blob_wide \ + support_quote_string \ + support_record_failure \ + support_run_diff \ +@@ -84,6 +85,7 @@ libsupport-routines = \ + support_test_compare_blob \ + support_test_compare_failure \ + support_test_compare_string \ ++ support_test_compare_string_wide \ + support_test_main \ + support_test_verify_impl \ + support_wait_for_thread_exit \ +@@ -270,11 +272,13 @@ tests = \ + tst-support-open-dev-null-range \ + tst-support-process_state \ + tst-support_quote_blob \ ++ tst-support_quote_blob_wide \ + tst-support_quote_string \ + tst-support_record_failure \ + tst-test_compare \ + tst-test_compare_blob \ + tst-test_compare_string \ ++ tst-test_compare_string_wide \ + tst-timespec \ + tst-xreadlink \ + tst-xsigstack \ +diff --git a/support/check.h b/support/check.h +index 83662b2d10..9b1844352f 100644 +--- a/support/check.h ++++ b/support/check.h +@@ -20,6 +20,7 @@ + #define SUPPORT_CHECK_H + + #include ++#include + + __BEGIN_DECLS + +@@ -171,11 +172,25 @@ void support_test_compare_blob (const void *left, + (support_test_compare_string (left, right, __FILE__, __LINE__, \ + #left, #right)) + ++/* Compare the wide strings LEFT and RIGHT and report a test failure ++ if they are different. Also report failure if one of the arguments ++ is a null pointer and the other is not. The strings should be ++ reasonably short because on mismatch, both are printed. */ ++#define TEST_COMPARE_STRING_WIDE(left, right) \ ++ (support_test_compare_string_wide (left, right, __FILE__, __LINE__, \ ++ #left, #right)) ++ + void support_test_compare_string (const char *left, const char *right, + const char *file, int line, + const char *left_expr, + const char *right_expr); + ++void support_test_compare_string_wide (const wchar_t *left, ++ const wchar_t *right, ++ const char *file, int line, ++ const char *left_expr, ++ const char *right_expr); ++ + /* Internal function called by the test driver. */ + int support_report_failure (int status) + __attribute__ ((weak, warn_unused_result)); +diff --git a/support/support.h b/support/support.h +index 837a806531..0ee454da6d 100644 +--- a/support/support.h ++++ b/support/support.h +@@ -73,6 +73,12 @@ void support_write_file_string (const char *path, const char *contents); + the result). */ + char *support_quote_blob (const void *blob, size_t length); + ++/* Quote the contents of the wide character array starting at BLOB, of ++ LENGTH wide characters, in such a way that the result string can be ++ included in a C wide string literal (in single/double quotes, ++ without putting the quotes into the result). */ ++char *support_quote_blob_wide (const void *blob, size_t length); ++ + /* Quote the contents of the string, in such a way that the result + string can be included in a C literal (in single/double quotes, + without putting the quotes into the result). */ +diff --git a/support/support_quote_blob.c b/support/support_quote_blob.c +index b5e70125f1..611980c9a2 100644 +--- a/support/support_quote_blob.c ++++ b/support/support_quote_blob.c +@@ -1,4 +1,4 @@ +-/* Quote a blob so that it can be used in C literals. ++/* Quote a narrow string blob so that it can be used in C literals. + Copyright (C) 2018-2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + +@@ -16,68 +16,9 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#define CHAR unsigned char ++#define L_(C) C ++#define SUPPORT_QUOTE_BLOB support_quote_blob ++#define WIDE 0 + +-char * +-support_quote_blob (const void *blob, size_t length) +-{ +- struct xmemstream out; +- xopen_memstream (&out); +- +- const unsigned char *p = blob; +- for (size_t i = 0; i < length; ++i) +- { +- unsigned char ch = p[i]; +- +- /* Use C backslash escapes for those control characters for +- which they are defined. */ +- switch (ch) +- { +- case '\a': +- putc_unlocked ('\\', out.out); +- putc_unlocked ('a', out.out); +- break; +- case '\b': +- putc_unlocked ('\\', out.out); +- putc_unlocked ('b', out.out); +- break; +- case '\f': +- putc_unlocked ('\\', out.out); +- putc_unlocked ('f', out.out); +- break; +- case '\n': +- putc_unlocked ('\\', out.out); +- putc_unlocked ('n', out.out); +- break; +- case '\r': +- putc_unlocked ('\\', out.out); +- putc_unlocked ('r', out.out); +- break; +- case '\t': +- putc_unlocked ('\\', out.out); +- putc_unlocked ('t', out.out); +- break; +- case '\v': +- putc_unlocked ('\\', out.out); +- putc_unlocked ('v', out.out); +- break; +- case '\\': +- case '\'': +- case '\"': +- putc_unlocked ('\\', out.out); +- putc_unlocked (ch, out.out); +- break; +- default: +- if (ch < ' ' || ch > '~') +- /* Use octal sequences because they are fixed width, +- unlike hexadecimal sequences. */ +- fprintf (out.out, "\\%03o", ch); +- else +- putc_unlocked (ch, out.out); +- } +- } +- +- xfclose_memstream (&out); +- return out.buffer; +-} ++#include "support_quote_blob_main.c" +diff --git a/support/support_quote_blob_main.c b/support/support_quote_blob_main.c +new file mode 100644 +index 0000000000..19ccfad593 +--- /dev/null ++++ b/support/support_quote_blob_main.c +@@ -0,0 +1,88 @@ ++/* Quote a blob so that it can be used in C literals. ++ Copyright (C) 2018-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++char * ++SUPPORT_QUOTE_BLOB (const void *blob, size_t length) ++{ ++ struct xmemstream out; ++ xopen_memstream (&out); ++ ++ const CHAR *p = blob; ++ for (size_t i = 0; i < length; ++i) ++ { ++ CHAR ch = p[i]; ++ ++ /* Use C backslash escapes for those control characters for ++ which they are defined. */ ++ switch (ch) ++ { ++ case L_('\a'): ++ putc_unlocked ('\\', out.out); ++ putc_unlocked ('a', out.out); ++ break; ++ case L_('\b'): ++ putc_unlocked ('\\', out.out); ++ putc_unlocked ('b', out.out); ++ break; ++ case L_('\f'): ++ putc_unlocked ('\\', out.out); ++ putc_unlocked ('f', out.out); ++ break; ++ case L_('\n'): ++ putc_unlocked ('\\', out.out); ++ putc_unlocked ('n', out.out); ++ break; ++ case L_('\r'): ++ putc_unlocked ('\\', out.out); ++ putc_unlocked ('r', out.out); ++ break; ++ case L_('\t'): ++ putc_unlocked ('\\', out.out); ++ putc_unlocked ('t', out.out); ++ break; ++ case L_('\v'): ++ putc_unlocked ('\\', out.out); ++ putc_unlocked ('v', out.out); ++ break; ++ case L_('\\'): ++ case L_('\''): ++ case L_('\"'): ++ putc_unlocked ('\\', out.out); ++ putc_unlocked (ch, out.out); ++ break; ++ default: ++ if (ch < L_(' ') || ch > L_('~')) ++ /* For narrow characters, use octal sequences because they ++ are fixed width, unlike hexadecimal sequences. For ++ wide characters, use N2785 delimited escape ++ sequences. */ ++ if (WIDE) ++ fprintf (out.out, "\\x{%x}", (unsigned int) ch); ++ else ++ fprintf (out.out, "\\%03o", (unsigned int) ch); ++ else ++ putc_unlocked (ch, out.out); ++ } ++ } ++ ++ xfclose_memstream (&out); ++ return out.buffer; ++} +diff --git a/support/support_quote_blob_wide.c b/support/support_quote_blob_wide.c +new file mode 100644 +index 0000000000..c451ed889c +--- /dev/null ++++ b/support/support_quote_blob_wide.c +@@ -0,0 +1,24 @@ ++/* Quote a wide string blob so that it can be used in C literals. ++ Copyright (C) 2018-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define CHAR wchar_t ++#define L_(C) L ## C ++#define SUPPORT_QUOTE_BLOB support_quote_blob_wide ++#define WIDE 1 ++ ++#include "support_quote_blob_main.c" +diff --git a/support/support_test_compare_string.c b/support/support_test_compare_string.c +index cbeaf7b1ee..12bafe43d4 100644 +--- a/support/support_test_compare_string.c ++++ b/support/support_test_compare_string.c +@@ -16,76 +16,13 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include +-#include +-#include +-#include +-#include +- +-static void +-report_length (const char *what, const char *str, size_t length) +-{ +- if (str == NULL) +- printf (" %s string: NULL\n", what); +- else +- printf (" %s string: %zu bytes\n", what, length); +-} +- +-static void +-report_string (const char *what, const unsigned char *blob, +- size_t length, const char *expr) +-{ +- if (length > 0) +- { +- printf (" %s (evaluated from %s):\n", what, expr); +- char *quoted = support_quote_blob (blob, length); +- printf (" \"%s\"\n", quoted); +- free (quoted); +- +- fputs (" ", stdout); +- for (size_t i = 0; i < length; ++i) +- printf (" %02X", blob[i]); +- putc ('\n', stdout); +- } +-} +- +-static size_t +-string_length_or_zero (const char *str) +-{ +- if (str == NULL) +- return 0; +- else +- return strlen (str); +-} +- +-void +-support_test_compare_string (const char *left, const char *right, +- const char *file, int line, +- const char *left_expr, const char *right_expr) +-{ +- /* Two null pointers are accepted. */ +- if (left == NULL && right == NULL) +- return; +- +- size_t left_length = string_length_or_zero (left); +- size_t right_length = string_length_or_zero (right); +- +- if (left_length != right_length || left == NULL || right == NULL +- || memcmp (left, right, left_length) != 0) +- { +- support_record_failure (); +- printf ("%s:%d: error: string comparison failed\n", file, line); +- if (left_length == right_length && right != NULL && left != NULL) +- printf (" string length: %zu bytes\n", left_length); +- else +- { +- report_length ("left", left, left_length); +- report_length ("right", right, right_length); +- } +- report_string ("left", (const unsigned char *) left, +- left_length, left_expr); +- report_string ("right", (const unsigned char *) right, +- right_length, right_expr); +- } +-} ++#define CHAR char ++#define UCHAR unsigned char ++#define LPREFIX "" ++#define STRLEN strlen ++#define MEMCMP memcmp ++#define SUPPORT_QUOTE_BLOB support_quote_blob ++#define SUPPORT_TEST_COMPARE_STRING support_test_compare_string ++#define WIDE 0 ++ ++#include "support_test_compare_string_main.c" +diff --git a/support/support_test_compare_string_main.c b/support/support_test_compare_string_main.c +new file mode 100644 +index 0000000000..0edc0ca97d +--- /dev/null ++++ b/support/support_test_compare_string_main.c +@@ -0,0 +1,94 @@ ++/* Check two strings for equality. ++ Copyright (C) 2018-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static void ++report_length (const char *what, const CHAR *str, size_t length) ++{ ++ if (str == NULL) ++ printf (" %s string: NULL\n", what); ++ else ++ printf (" %s string: %zu %s\n", what, length, ++ WIDE ? "wide characters" : "bytes"); ++} ++ ++static void ++report_string (const char *what, const UCHAR *blob, ++ size_t length, const char *expr) ++{ ++ if (length > 0) ++ { ++ printf (" %s (evaluated from %s):\n", what, expr); ++ char *quoted = SUPPORT_QUOTE_BLOB (blob, length); ++ printf (" %s\"%s\"\n", LPREFIX, quoted); ++ free (quoted); ++ ++ fputs (" ", stdout); ++ for (size_t i = 0; i < length; ++i) ++ printf (" %02X", (unsigned int) blob[i]); ++ putc ('\n', stdout); ++ } ++} ++ ++static size_t ++string_length_or_zero (const CHAR *str) ++{ ++ if (str == NULL) ++ return 0; ++ else ++ return STRLEN (str); ++} ++ ++void ++SUPPORT_TEST_COMPARE_STRING (const CHAR *left, const CHAR *right, ++ const char *file, int line, ++ const char *left_expr, const char *right_expr) ++{ ++ /* Two null pointers are accepted. */ ++ if (left == NULL && right == NULL) ++ return; ++ ++ size_t left_length = string_length_or_zero (left); ++ size_t right_length = string_length_or_zero (right); ++ ++ if (left_length != right_length || left == NULL || right == NULL ++ || MEMCMP (left, right, left_length) != 0) ++ { ++ support_record_failure (); ++ printf ("%s:%d: error: string comparison failed\n", file, line); ++ if (left_length == right_length && right != NULL && left != NULL) ++ printf (" string length: %zu %s\n", left_length, ++ WIDE ? "wide characters" : "bytes"); ++ else ++ { ++ report_length ("left", left, left_length); ++ report_length ("right", right, right_length); ++ } ++ report_string ("left", (const UCHAR *) left, ++ left_length, left_expr); ++ report_string ("right", (const UCHAR *) right, ++ right_length, right_expr); ++ } ++} +diff --git a/support/support_test_compare_string_wide.c b/support/support_test_compare_string_wide.c +new file mode 100644 +index 0000000000..88b560b142 +--- /dev/null ++++ b/support/support_test_compare_string_wide.c +@@ -0,0 +1,28 @@ ++/* Check two wide strings for equality. ++ Copyright (C) 2018-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define CHAR wchar_t ++#define UCHAR wchar_t ++#define LPREFIX "L" ++#define STRLEN wcslen ++#define MEMCMP wmemcmp ++#define SUPPORT_QUOTE_BLOB support_quote_blob_wide ++#define SUPPORT_TEST_COMPARE_STRING support_test_compare_string_wide ++#define WIDE 1 ++ ++#include "support_test_compare_string_main.c" +diff --git a/support/tst-support_quote_blob_wide.c b/support/tst-support_quote_blob_wide.c +new file mode 100644 +index 0000000000..ea71a1f7f8 +--- /dev/null ++++ b/support/tst-support_quote_blob_wide.c +@@ -0,0 +1,66 @@ ++/* Test the support_quote_blob_wide function. ++ Copyright (C) 2018-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ /* Check handling of the empty blob, both with and without trailing ++ NUL byte. */ ++ char *p = support_quote_blob_wide (L"", 0); ++ TEST_COMPARE (strlen (p), 0); ++ free (p); ++ p = support_quote_blob_wide (L"X", 0); ++ TEST_COMPARE (strlen (p), 0); ++ free (p); ++ ++ /* Check escaping of backslash-escaped characters, and lack of ++ escaping for other shell meta-characters. */ ++ p = support_quote_blob_wide (L"$()*?`@[]{}~\'\"X", 14); ++ TEST_COMPARE (strcmp (p, "$()*?`@[]{}~\\'\\\""), 0); ++ free (p); ++ ++ /* Check lack of escaping for letters and digits. */ ++#define LETTERS_AND_DIGTS \ ++ "abcdefghijklmnopqrstuvwxyz" \ ++ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ ++ "0123456789" ++#define CONCATX(X, Y) X ## Y ++#define CONCAT(X, Y) CONCATX (X, Y) ++#define WLETTERS_AND_DIGTS CONCAT (L, LETTERS_AND_DIGTS) ++ p = support_quote_blob_wide (WLETTERS_AND_DIGTS "@", 2 * 26 + 10); ++ TEST_COMPARE (strcmp (p, LETTERS_AND_DIGTS), 0); ++ free (p); ++ ++ /* Check escaping of control characters and other non-printable ++ characters. */ ++ p = support_quote_blob_wide (L"\r\n\t\a\b\f\v\1\177\200\377" ++ "\x123\x76543210\xfedcba98\0@", 17); ++ TEST_COMPARE (strcmp (p, "\\r\\n\\t\\a\\b\\f\\v\\x{1}" ++ "\\x{7f}\\x{80}\\x{ff}\\x{123}\\x{76543210}" ++ "\\x{fedcba98}\\x{0}@\\x{0}"), 0); ++ free (p); ++ ++ return 0; ++} ++ ++#include +diff --git a/support/tst-test_compare_string_wide.c b/support/tst-test_compare_string_wide.c +new file mode 100644 +index 0000000000..548f7dcdc6 +--- /dev/null ++++ b/support/tst-test_compare_string_wide.c +@@ -0,0 +1,107 @@ ++/* Basic test for the TEST_COMPARE_STRING_WIDE macro. ++ Copyright (C) 2018-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static void ++subprocess (void *closure) ++{ ++ /* These tests should fail. They were chosen to cover differences ++ in length (with the same contents), single-bit mismatches, and ++ mismatching null pointers. */ ++ TEST_COMPARE_STRING_WIDE (L"", NULL); /* Line 29. */ ++ TEST_COMPARE_STRING_WIDE (L"X", L""); /* Line 30. */ ++ TEST_COMPARE_STRING_WIDE (NULL, L"X"); /* Line 31. */ ++ TEST_COMPARE_STRING_WIDE (L"abcd", L"abcD"); /* Line 32. */ ++ TEST_COMPARE_STRING_WIDE (L"abcd", NULL); /* Line 33. */ ++ TEST_COMPARE_STRING_WIDE (NULL, L"abcd"); /* Line 34. */ ++} ++ ++/* Same contents, different addresses. */ ++wchar_t buffer_abc_1[] = L"abc"; ++wchar_t buffer_abc_2[] = L"abc"; ++ ++static int ++do_test (void) ++{ ++ /* This should succeed. Even if the pointers and array contents are ++ different, zero-length inputs are not different. */ ++ TEST_COMPARE_STRING_WIDE (NULL, NULL); ++ TEST_COMPARE_STRING_WIDE (L"", L""); ++ TEST_COMPARE_STRING_WIDE (buffer_abc_1, buffer_abc_2); ++ TEST_COMPARE_STRING_WIDE (buffer_abc_1, L"abc"); ++ ++ struct support_capture_subprocess proc = support_capture_subprocess ++ (&subprocess, NULL); ++ ++ /* Discard the reported error. */ ++ support_record_failure_reset (); ++ ++ puts ("info: *** subprocess output starts ***"); ++ fputs (proc.out.buffer, stdout); ++ puts ("info: *** subprocess output ends ***"); ++ ++ TEST_VERIFY ++ (strcmp (proc.out.buffer, ++"tst-test_compare_string_wide.c:29: error: string comparison failed\n" ++" left string: 0 wide characters\n" ++" right string: NULL\n" ++"tst-test_compare_string_wide.c:30: error: string comparison failed\n" ++" left string: 1 wide characters\n" ++" right string: 0 wide characters\n" ++" left (evaluated from L\"X\"):\n" ++" L\"X\"\n" ++" 58\n" ++"tst-test_compare_string_wide.c:31: error: string comparison failed\n" ++" left string: NULL\n" ++" right string: 1 wide characters\n" ++" right (evaluated from L\"X\"):\n" ++" L\"X\"\n" ++" 58\n" ++"tst-test_compare_string_wide.c:32: error: string comparison failed\n" ++" string length: 4 wide characters\n" ++" left (evaluated from L\"abcd\"):\n" ++" L\"abcd\"\n" ++" 61 62 63 64\n" ++" right (evaluated from L\"abcD\"):\n" ++" L\"abcD\"\n" ++" 61 62 63 44\n" ++"tst-test_compare_string_wide.c:33: error: string comparison failed\n" ++" left string: 4 wide characters\n" ++" right string: NULL\n" ++" left (evaluated from L\"abcd\"):\n" ++" L\"abcd\"\n" ++" 61 62 63 64\n" ++"tst-test_compare_string_wide.c:34: error: string comparison failed\n" ++" left string: NULL\n" ++" right string: 4 wide characters\n" ++" right (evaluated from L\"abcd\"):\n" ++" L\"abcd\"\n" ++" 61 62 63 64\n" ++ ) == 0); ++ ++ /* Check that there is no output on standard error. */ ++ support_capture_subprocess_check (&proc, "TEST_COMPARE_STRING_WIDE", ++ 0, sc_allow_stdout); ++ ++ return 0; ++} ++ ++#include +-- +2.28.0.windows.1 + diff --git a/backport-Add-codepoint_collation-support-for-LC_COLLATE.patch b/backport-Add-codepoint_collation-support-for-LC_COLLATE.patch new file mode 100644 index 0000000000000000000000000000000000000000..55fe40fe6c87edd4167ab81f1140aaf882d4eea9 --- /dev/null +++ b/backport-Add-codepoint_collation-support-for-LC_COLLATE.patch @@ -0,0 +1,994 @@ +From f5117c6504888fab5423282a4607c552b90fd3f9 Mon Sep 17 00:00:00 2001 +From: Carlos O'Donell +Date: Thu, 29 Jul 2021 22:45:39 -0400 +Subject: [PATCH] Add 'codepoint_collation' support for LC_COLLATE. + +Support a new directive 'codepoint_collation' in the LC_COLLATE +section of a locale source file. This new directive causes all +collation rules to be dropped and instead STRCMP (strcmp or +wcscmp) is used for collation of the input character set. This +is required to allow for a C.UTF-8 that contains zero collation +rules (minimal size) and sorts using code point sorting. + +To date the only implementation of a locale with zero collation +rules is the C/POSIX locale. The C/POSIX locale provides +identity tables for _NL_COLLATE_COLLSEQMB and +_NL_COLLATE_COLLSEQWC that map to ASCII even though it has zero +rules. This has lead to existing fnmatch, regexec, and regcomp +implementations that require these tables. It is not correct +to use these tables when nrules == 0, but the conservative fix +is to provide these tables when nrules == 0. This assures that +existing static applications using a new C.UTF-8 locale with +'codepoint_collation' at least have functional range expressions +with ASCII e.g. [0-9] or [a-z]. Such static applications would +not have the fixes to fnmatch, regexec and regcomp that avoid +the use of the tables when nrules == 0. Future fixes to fnmatch, +regexec, and regcomp would allow range expressions to use the +full set of code points for such ranges. + +Tested on x86_64 and i686 without regression. + +Reviewed-by: Florian Weimer + +Conflict:NA +Reference:https://sourceware.org/git/?p=glibc.git;a=commit;h=f5117c6504888fab5423282a4607c552b90fd3f9 +--- + locale/C-collate-seq.c | 100 +++++++++++ + locale/C-collate.c | 78 +------- + locale/programs/ld-collate.c | 36 +++- + locale/programs/locfile-kw.gperf | 1 + + locale/programs/locfile-kw.h | 299 ++++++++++++++++--------------- + locale/programs/locfile-token.h | 1 + + 6 files changed, 286 insertions(+), 229 deletions(-) + create mode 100644 locale/C-collate-seq.c + +diff --git a/locale/C-collate-seq.c b/locale/C-collate-seq.c +new file mode 100644 +index 0000000000..4fb82cb835 +--- /dev/null ++++ b/locale/C-collate-seq.c +@@ -0,0 +1,100 @@ ++/* Copyright (C) 1995-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++static const char collseqmb[] = ++{ ++ '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', ++ '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', ++ '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', ++ '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', ++ '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', ++ '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', ++ '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', ++ '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', ++ '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', ++ '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', ++ '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', ++ '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', ++ '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', ++ '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', ++ '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', ++ '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f', ++ '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', ++ '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', ++ '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', ++ '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', ++ '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', ++ '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', ++ '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', ++ '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', ++ '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', ++ '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', ++ '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', ++ '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', ++ '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', ++ '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', ++ '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', ++ '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff' ++}; ++ ++/* This table must be 256 bytes in size. We index bytes into the ++ table to find the collation sequence. */ ++_Static_assert (sizeof (collseqmb) == 256); ++ ++static const uint32_t collseqwc[] = ++{ ++ 8, 1, 8, 0x0, 0xff, ++ /* 1st-level table */ ++ 6 * sizeof (uint32_t), ++ /* 2nd-level table */ ++ 7 * sizeof (uint32_t), ++ /* 3rd-level table */ ++ L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07', ++ L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f', ++ L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17', ++ L'\x18', L'\x19', L'\x1a', L'\x1b', L'\x1c', L'\x1d', L'\x1e', L'\x1f', ++ L'\x20', L'\x21', L'\x22', L'\x23', L'\x24', L'\x25', L'\x26', L'\x27', ++ L'\x28', L'\x29', L'\x2a', L'\x2b', L'\x2c', L'\x2d', L'\x2e', L'\x2f', ++ L'\x30', L'\x31', L'\x32', L'\x33', L'\x34', L'\x35', L'\x36', L'\x37', ++ L'\x38', L'\x39', L'\x3a', L'\x3b', L'\x3c', L'\x3d', L'\x3e', L'\x3f', ++ L'\x40', L'\x41', L'\x42', L'\x43', L'\x44', L'\x45', L'\x46', L'\x47', ++ L'\x48', L'\x49', L'\x4a', L'\x4b', L'\x4c', L'\x4d', L'\x4e', L'\x4f', ++ L'\x50', L'\x51', L'\x52', L'\x53', L'\x54', L'\x55', L'\x56', L'\x57', ++ L'\x58', L'\x59', L'\x5a', L'\x5b', L'\x5c', L'\x5d', L'\x5e', L'\x5f', ++ L'\x60', L'\x61', L'\x62', L'\x63', L'\x64', L'\x65', L'\x66', L'\x67', ++ L'\x68', L'\x69', L'\x6a', L'\x6b', L'\x6c', L'\x6d', L'\x6e', L'\x6f', ++ L'\x70', L'\x71', L'\x72', L'\x73', L'\x74', L'\x75', L'\x76', L'\x77', ++ L'\x78', L'\x79', L'\x7a', L'\x7b', L'\x7c', L'\x7d', L'\x7e', L'\x7f', ++ L'\x80', L'\x81', L'\x82', L'\x83', L'\x84', L'\x85', L'\x86', L'\x87', ++ L'\x88', L'\x89', L'\x8a', L'\x8b', L'\x8c', L'\x8d', L'\x8e', L'\x8f', ++ L'\x90', L'\x91', L'\x92', L'\x93', L'\x94', L'\x95', L'\x96', L'\x97', ++ L'\x98', L'\x99', L'\x9a', L'\x9b', L'\x9c', L'\x9d', L'\x9e', L'\x9f', ++ L'\xa0', L'\xa1', L'\xa2', L'\xa3', L'\xa4', L'\xa5', L'\xa6', L'\xa7', ++ L'\xa8', L'\xa9', L'\xaa', L'\xab', L'\xac', L'\xad', L'\xae', L'\xaf', ++ L'\xb0', L'\xb1', L'\xb2', L'\xb3', L'\xb4', L'\xb5', L'\xb6', L'\xb7', ++ L'\xb8', L'\xb9', L'\xba', L'\xbb', L'\xbc', L'\xbd', L'\xbe', L'\xbf', ++ L'\xc0', L'\xc1', L'\xc2', L'\xc3', L'\xc4', L'\xc5', L'\xc6', L'\xc7', ++ L'\xc8', L'\xc9', L'\xca', L'\xcb', L'\xcc', L'\xcd', L'\xce', L'\xcf', ++ L'\xd0', L'\xd1', L'\xd2', L'\xd3', L'\xd4', L'\xd5', L'\xd6', L'\xd7', ++ L'\xd8', L'\xd9', L'\xda', L'\xdb', L'\xdc', L'\xdd', L'\xde', L'\xdf', ++ L'\xe0', L'\xe1', L'\xe2', L'\xe3', L'\xe4', L'\xe5', L'\xe6', L'\xe7', ++ L'\xe8', L'\xe9', L'\xea', L'\xeb', L'\xec', L'\xed', L'\xee', L'\xef', ++ L'\xf0', L'\xf1', L'\xf2', L'\xf3', L'\xf4', L'\xf5', L'\xf6', L'\xf7', ++ L'\xf8', L'\xf9', L'\xfa', L'\xfb', L'\xfc', L'\xfd', L'\xfe', L'\xff' ++}; +diff --git a/locale/C-collate.c b/locale/C-collate.c +index 02b70570a4..bc93819f32 100644 +--- a/locale/C-collate.c ++++ b/locale/C-collate.c +@@ -19,83 +19,7 @@ + #include + #include "localeinfo.h" + +-static const char collseqmb[] = +-{ +- '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', +- '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', +- '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', +- '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', +- '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', +- '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', +- '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', +- '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', +- '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', +- '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', +- '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', +- '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', +- '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', +- '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', +- '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', +- '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f', +- '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', +- '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', +- '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', +- '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', +- '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', +- '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', +- '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', +- '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', +- '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', +- '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', +- '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', +- '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', +- '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', +- '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', +- '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', +- '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff' +-}; +- +-static const uint32_t collseqwc[] = +-{ +- 8, 1, 8, 0x0, 0xff, +- /* 1st-level table */ +- 6 * sizeof (uint32_t), +- /* 2nd-level table */ +- 7 * sizeof (uint32_t), +- /* 3rd-level table */ +- L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07', +- L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f', +- L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17', +- L'\x18', L'\x19', L'\x1a', L'\x1b', L'\x1c', L'\x1d', L'\x1e', L'\x1f', +- L'\x20', L'\x21', L'\x22', L'\x23', L'\x24', L'\x25', L'\x26', L'\x27', +- L'\x28', L'\x29', L'\x2a', L'\x2b', L'\x2c', L'\x2d', L'\x2e', L'\x2f', +- L'\x30', L'\x31', L'\x32', L'\x33', L'\x34', L'\x35', L'\x36', L'\x37', +- L'\x38', L'\x39', L'\x3a', L'\x3b', L'\x3c', L'\x3d', L'\x3e', L'\x3f', +- L'\x40', L'\x41', L'\x42', L'\x43', L'\x44', L'\x45', L'\x46', L'\x47', +- L'\x48', L'\x49', L'\x4a', L'\x4b', L'\x4c', L'\x4d', L'\x4e', L'\x4f', +- L'\x50', L'\x51', L'\x52', L'\x53', L'\x54', L'\x55', L'\x56', L'\x57', +- L'\x58', L'\x59', L'\x5a', L'\x5b', L'\x5c', L'\x5d', L'\x5e', L'\x5f', +- L'\x60', L'\x61', L'\x62', L'\x63', L'\x64', L'\x65', L'\x66', L'\x67', +- L'\x68', L'\x69', L'\x6a', L'\x6b', L'\x6c', L'\x6d', L'\x6e', L'\x6f', +- L'\x70', L'\x71', L'\x72', L'\x73', L'\x74', L'\x75', L'\x76', L'\x77', +- L'\x78', L'\x79', L'\x7a', L'\x7b', L'\x7c', L'\x7d', L'\x7e', L'\x7f', +- L'\x80', L'\x81', L'\x82', L'\x83', L'\x84', L'\x85', L'\x86', L'\x87', +- L'\x88', L'\x89', L'\x8a', L'\x8b', L'\x8c', L'\x8d', L'\x8e', L'\x8f', +- L'\x90', L'\x91', L'\x92', L'\x93', L'\x94', L'\x95', L'\x96', L'\x97', +- L'\x98', L'\x99', L'\x9a', L'\x9b', L'\x9c', L'\x9d', L'\x9e', L'\x9f', +- L'\xa0', L'\xa1', L'\xa2', L'\xa3', L'\xa4', L'\xa5', L'\xa6', L'\xa7', +- L'\xa8', L'\xa9', L'\xaa', L'\xab', L'\xac', L'\xad', L'\xae', L'\xaf', +- L'\xb0', L'\xb1', L'\xb2', L'\xb3', L'\xb4', L'\xb5', L'\xb6', L'\xb7', +- L'\xb8', L'\xb9', L'\xba', L'\xbb', L'\xbc', L'\xbd', L'\xbe', L'\xbf', +- L'\xc0', L'\xc1', L'\xc2', L'\xc3', L'\xc4', L'\xc5', L'\xc6', L'\xc7', +- L'\xc8', L'\xc9', L'\xca', L'\xcb', L'\xcc', L'\xcd', L'\xce', L'\xcf', +- L'\xd0', L'\xd1', L'\xd2', L'\xd3', L'\xd4', L'\xd5', L'\xd6', L'\xd7', +- L'\xd8', L'\xd9', L'\xda', L'\xdb', L'\xdc', L'\xdd', L'\xde', L'\xdf', +- L'\xe0', L'\xe1', L'\xe2', L'\xe3', L'\xe4', L'\xe5', L'\xe6', L'\xe7', +- L'\xe8', L'\xe9', L'\xea', L'\xeb', L'\xec', L'\xed', L'\xee', L'\xef', +- L'\xf0', L'\xf1', L'\xf2', L'\xf3', L'\xf4', L'\xf5', L'\xf6', L'\xf7', +- L'\xf8', L'\xf9', L'\xfa', L'\xfb', L'\xfc', L'\xfd', L'\xfe', L'\xff' +-}; ++#include "C-collate-seq.c" + + const struct __locale_data _nl_C_LC_COLLATE attribute_hidden = + { +diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c +index f4a8f34e46..06a5203334 100644 +--- a/locale/programs/ld-collate.c ++++ b/locale/programs/ld-collate.c +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + + #include "localedef.h" + #include "charmap.h" +@@ -194,6 +195,9 @@ struct name_list + /* The real definition of the struct for the LC_COLLATE locale. */ + struct locale_collate_t + { ++ /* Does the locale use code points to compare the encoding? */ ++ bool codepoint_collation; ++ + int col_weight_max; + int cur_weight_max; + +@@ -1509,6 +1513,7 @@ collate_startup (struct linereader *ldfile, struct localedef_t *locale, + obstack_init (&collate->mempool); + + collate->col_weight_max = -1; ++ collate->codepoint_collation = false; + } + else + /* Reuse the copy_locale's data structures. */ +@@ -1567,6 +1572,10 @@ collate_finish (struct localedef_t *locale, const struct charmap_t *charmap) + return; + } + ++ /* No data required. */ ++ if (collate->codepoint_collation) ++ return; ++ + /* If this assertion is hit change the type in `element_t'. */ + assert (nrules <= sizeof (runp->used_in_level) * 8); + +@@ -2091,6 +2100,10 @@ add_to_tablewc (uint32_t ch, struct element_t *runp) + } + } + ++/* Include the C locale identity tables for _NL_COLLATE_COLLSEQMB and ++ _NL_COLLATE_COLLSEQWC. */ ++#include "C-collate-seq.c" ++ + void + collate_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +@@ -2114,7 +2127,7 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap, + add_locale_uint32 (&file, nrules); + + /* If we have no LC_COLLATE data emit only the number of rules as zero. */ +- if (collate == NULL) ++ if (collate == NULL || collate->codepoint_collation) + { + size_t idx; + for (idx = 1; idx < nelems; idx++) +@@ -2122,6 +2135,17 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap, + /* The words have to be handled specially. */ + if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB)) + add_locale_uint32 (&file, 0); ++ else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_CODESET) ++ && collate != NULL) ++ /* A valid LC_COLLATE must have a code set name. */ ++ add_locale_string (&file, charmap->code_set_name); ++ else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB) ++ && collate != NULL) ++ add_locale_raw_data (&file, collseqmb, sizeof (collseqmb)); ++ else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC) ++ && collate != NULL) ++ add_locale_uint32_array (&file, collseqwc, ++ array_length (collseqwc)); + else + add_locale_empty (&file); + } +@@ -2671,6 +2695,10 @@ collate_read (struct linereader *ldfile, struct localedef_t *result, + + switch (nowtok) + { ++ case tok_codepoint_collation: ++ collate->codepoint_collation = true; ++ break; ++ + case tok_copy: + /* Allow copying other locales. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); +@@ -3741,9 +3769,11 @@ error while adding equivalent collating symbol")); + /* Next we assume `LC_COLLATE'. */ + if (!ignore_content) + { +- if (state == 0 && copy_locale == NULL) ++ if (state == 0 ++ && copy_locale == NULL ++ && !collate->codepoint_collation) + /* We must either see a copy statement or have +- ordering values. */ ++ ordering values, or codepoint_collation. */ + lr_error (ldfile, + _("%s: empty category description not allowed"), + "LC_COLLATE"); +diff --git a/locale/programs/locfile-kw.gperf b/locale/programs/locfile-kw.gperf +index 0d3b95d77b..5ca9b47085 100644 +--- a/locale/programs/locfile-kw.gperf ++++ b/locale/programs/locfile-kw.gperf +@@ -53,6 +53,7 @@ translit_end, tok_translit_end, 0 + translit_ignore, tok_translit_ignore, 0 + default_missing, tok_default_missing, 0 + LC_COLLATE, tok_lc_collate, 0 ++codepoint_collation, tok_codepoint_collation, 0 + coll_weight_max, tok_coll_weight_max, 0 + section-symbol, tok_section_symbol, 0 + collating-element, tok_collating_element, 0 +diff --git a/locale/programs/locfile-kw.h b/locale/programs/locfile-kw.h +index dc150bb8f8..c57d74f5f3 100644 +--- a/locale/programs/locfile-kw.h ++++ b/locale/programs/locfile-kw.h +@@ -53,7 +53,7 @@ + #line 24 "locfile-kw.gperf" + struct keyword_t ; + +-#define TOTAL_KEYWORDS 178 ++#define TOTAL_KEYWORDS 179 + #define MIN_WORD_LENGTH 3 + #define MAX_WORD_LENGTH 22 + #define MIN_HASH_VALUE 3 +@@ -133,92 +133,92 @@ locfile_hash (register const char *str, register size_t len) + #line 31 "locfile-kw.gperf" + {"END", tok_end, 0}, + {""}, {""}, +-#line 70 "locfile-kw.gperf" ++#line 71 "locfile-kw.gperf" + {"IGNORE", tok_ignore, 0}, +-#line 129 "locfile-kw.gperf" ++#line 130 "locfile-kw.gperf" + {"LC_TIME", tok_lc_time, 0}, + #line 30 "locfile-kw.gperf" + {"LC_CTYPE", tok_lc_ctype, 0}, + {""}, +-#line 168 "locfile-kw.gperf" ++#line 169 "locfile-kw.gperf" + {"LC_ADDRESS", tok_lc_address, 0}, +-#line 153 "locfile-kw.gperf" ++#line 154 "locfile-kw.gperf" + {"LC_MESSAGES", tok_lc_messages, 0}, +-#line 161 "locfile-kw.gperf" ++#line 162 "locfile-kw.gperf" + {"LC_NAME", tok_lc_name, 0}, +-#line 158 "locfile-kw.gperf" ++#line 159 "locfile-kw.gperf" + {"LC_PAPER", tok_lc_paper, 0}, +-#line 186 "locfile-kw.gperf" ++#line 187 "locfile-kw.gperf" + {"LC_MEASUREMENT", tok_lc_measurement, 0}, + #line 56 "locfile-kw.gperf" + {"LC_COLLATE", tok_lc_collate, 0}, + {""}, +-#line 188 "locfile-kw.gperf" ++#line 189 "locfile-kw.gperf" + {"LC_IDENTIFICATION", tok_lc_identification, 0}, +-#line 201 "locfile-kw.gperf" ++#line 202 "locfile-kw.gperf" + {"revision", tok_revision, 0}, +-#line 69 "locfile-kw.gperf" ++#line 70 "locfile-kw.gperf" + {"UNDEFINED", tok_undefined, 0}, +-#line 125 "locfile-kw.gperf" ++#line 126 "locfile-kw.gperf" + {"LC_NUMERIC", tok_lc_numeric, 0}, +-#line 82 "locfile-kw.gperf" ++#line 83 "locfile-kw.gperf" + {"LC_MONETARY", tok_lc_monetary, 0}, +-#line 181 "locfile-kw.gperf" ++#line 182 "locfile-kw.gperf" + {"LC_TELEPHONE", tok_lc_telephone, 0}, + {""}, {""}, {""}, +-#line 75 "locfile-kw.gperf" ++#line 76 "locfile-kw.gperf" + {"define", tok_define, 0}, +-#line 154 "locfile-kw.gperf" ++#line 155 "locfile-kw.gperf" + {"yesexpr", tok_yesexpr, 0}, +-#line 141 "locfile-kw.gperf" ++#line 142 "locfile-kw.gperf" + {"era_year", tok_era_year, 0}, + {""}, + #line 54 "locfile-kw.gperf" + {"translit_ignore", tok_translit_ignore, 0}, +-#line 156 "locfile-kw.gperf" ++#line 157 "locfile-kw.gperf" + {"yesstr", tok_yesstr, 0}, + {""}, +-#line 89 "locfile-kw.gperf" ++#line 90 "locfile-kw.gperf" + {"negative_sign", tok_negative_sign, 0}, + {""}, +-#line 137 "locfile-kw.gperf" ++#line 138 "locfile-kw.gperf" + {"t_fmt", tok_t_fmt, 0}, +-#line 159 "locfile-kw.gperf" ++#line 160 "locfile-kw.gperf" + {"height", tok_height, 0}, + {""}, {""}, + #line 52 "locfile-kw.gperf" + {"translit_start", tok_translit_start, 0}, +-#line 136 "locfile-kw.gperf" ++#line 137 "locfile-kw.gperf" + {"d_fmt", tok_d_fmt, 0}, + {""}, + #line 53 "locfile-kw.gperf" + {"translit_end", tok_translit_end, 0}, +-#line 94 "locfile-kw.gperf" ++#line 95 "locfile-kw.gperf" + {"n_cs_precedes", tok_n_cs_precedes, 0}, +-#line 144 "locfile-kw.gperf" ++#line 145 "locfile-kw.gperf" + {"era_t_fmt", tok_era_t_fmt, 0}, + #line 39 "locfile-kw.gperf" + {"space", tok_space, 0}, +-#line 72 "locfile-kw.gperf" +- {"reorder-end", tok_reorder_end, 0}, + #line 73 "locfile-kw.gperf" ++ {"reorder-end", tok_reorder_end, 0}, ++#line 74 "locfile-kw.gperf" + {"reorder-sections-after", tok_reorder_sections_after, 0}, + {""}, +-#line 142 "locfile-kw.gperf" ++#line 143 "locfile-kw.gperf" + {"era_d_fmt", tok_era_d_fmt, 0}, +-#line 189 "locfile-kw.gperf" ++#line 190 "locfile-kw.gperf" + {"title", tok_title, 0}, + {""}, {""}, +-#line 149 "locfile-kw.gperf" ++#line 150 "locfile-kw.gperf" + {"timezone", tok_timezone, 0}, + {""}, +-#line 74 "locfile-kw.gperf" ++#line 75 "locfile-kw.gperf" + {"reorder-sections-end", tok_reorder_sections_end, 0}, + {""}, {""}, {""}, +-#line 95 "locfile-kw.gperf" ++#line 96 "locfile-kw.gperf" + {"n_sep_by_space", tok_n_sep_by_space, 0}, + {""}, {""}, +-#line 100 "locfile-kw.gperf" ++#line 101 "locfile-kw.gperf" + {"int_n_cs_precedes", tok_int_n_cs_precedes, 0}, + {""}, {""}, {""}, + #line 26 "locfile-kw.gperf" +@@ -232,147 +232,147 @@ locfile_hash (register const char *str, register size_t len) + {"print", tok_print, 0}, + #line 44 "locfile-kw.gperf" + {"xdigit", tok_xdigit, 0}, +-#line 110 "locfile-kw.gperf" ++#line 111 "locfile-kw.gperf" + {"duo_n_cs_precedes", tok_duo_n_cs_precedes, 0}, +-#line 127 "locfile-kw.gperf" ++#line 128 "locfile-kw.gperf" + {"thousands_sep", tok_thousands_sep, 0}, +-#line 197 "locfile-kw.gperf" ++#line 198 "locfile-kw.gperf" + {"territory", tok_territory, 0}, + #line 36 "locfile-kw.gperf" + {"digit", tok_digit, 0}, + {""}, {""}, +-#line 92 "locfile-kw.gperf" ++#line 93 "locfile-kw.gperf" + {"p_cs_precedes", tok_p_cs_precedes, 0}, + {""}, {""}, +-#line 62 "locfile-kw.gperf" ++#line 63 "locfile-kw.gperf" + {"script", tok_script, 0}, + #line 29 "locfile-kw.gperf" + {"include", tok_include, 0}, + {""}, +-#line 78 "locfile-kw.gperf" ++#line 79 "locfile-kw.gperf" + {"else", tok_else, 0}, +-#line 184 "locfile-kw.gperf" ++#line 185 "locfile-kw.gperf" + {"int_select", tok_int_select, 0}, + {""}, {""}, {""}, +-#line 132 "locfile-kw.gperf" ++#line 133 "locfile-kw.gperf" + {"week", tok_week, 0}, + #line 33 "locfile-kw.gperf" + {"upper", tok_upper, 0}, + {""}, {""}, +-#line 194 "locfile-kw.gperf" ++#line 195 "locfile-kw.gperf" + {"tel", tok_tel, 0}, +-#line 93 "locfile-kw.gperf" ++#line 94 "locfile-kw.gperf" + {"p_sep_by_space", tok_p_sep_by_space, 0}, +-#line 160 "locfile-kw.gperf" ++#line 161 "locfile-kw.gperf" + {"width", tok_width, 0}, + {""}, +-#line 98 "locfile-kw.gperf" ++#line 99 "locfile-kw.gperf" + {"int_p_cs_precedes", tok_int_p_cs_precedes, 0}, + {""}, {""}, + #line 41 "locfile-kw.gperf" + {"punct", tok_punct, 0}, + {""}, {""}, +-#line 101 "locfile-kw.gperf" ++#line 102 "locfile-kw.gperf" + {"int_n_sep_by_space", tok_int_n_sep_by_space, 0}, + {""}, {""}, {""}, +-#line 108 "locfile-kw.gperf" ++#line 109 "locfile-kw.gperf" + {"duo_p_cs_precedes", tok_duo_p_cs_precedes, 0}, + #line 48 "locfile-kw.gperf" + {"charconv", tok_charconv, 0}, + {""}, + #line 47 "locfile-kw.gperf" + {"class", tok_class, 0}, +-#line 114 "locfile-kw.gperf" +- {"duo_int_n_cs_precedes", tok_duo_int_n_cs_precedes, 0}, + #line 115 "locfile-kw.gperf" ++ {"duo_int_n_cs_precedes", tok_duo_int_n_cs_precedes, 0}, ++#line 116 "locfile-kw.gperf" + {"duo_int_n_sep_by_space", tok_duo_int_n_sep_by_space, 0}, +-#line 111 "locfile-kw.gperf" ++#line 112 "locfile-kw.gperf" + {"duo_n_sep_by_space", tok_duo_n_sep_by_space, 0}, +-#line 119 "locfile-kw.gperf" ++#line 120 "locfile-kw.gperf" + {"duo_int_n_sign_posn", tok_duo_int_n_sign_posn, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +-#line 58 "locfile-kw.gperf" ++#line 59 "locfile-kw.gperf" + {"section-symbol", tok_section_symbol, 0}, +-#line 185 "locfile-kw.gperf" ++#line 186 "locfile-kw.gperf" + {"int_prefix", tok_int_prefix, 0}, + {""}, {""}, {""}, {""}, + #line 42 "locfile-kw.gperf" + {"graph", tok_graph, 0}, + {""}, {""}, +-#line 99 "locfile-kw.gperf" ++#line 100 "locfile-kw.gperf" + {"int_p_sep_by_space", tok_int_p_sep_by_space, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +-#line 112 "locfile-kw.gperf" +- {"duo_int_p_cs_precedes", tok_duo_int_p_cs_precedes, 0}, + #line 113 "locfile-kw.gperf" ++ {"duo_int_p_cs_precedes", tok_duo_int_p_cs_precedes, 0}, ++#line 114 "locfile-kw.gperf" + {"duo_int_p_sep_by_space", tok_duo_int_p_sep_by_space, 0}, +-#line 109 "locfile-kw.gperf" ++#line 110 "locfile-kw.gperf" + {"duo_p_sep_by_space", tok_duo_p_sep_by_space, 0}, +-#line 118 "locfile-kw.gperf" ++#line 119 "locfile-kw.gperf" + {"duo_int_p_sign_posn", tok_duo_int_p_sign_posn, 0}, +-#line 157 "locfile-kw.gperf" ++#line 158 "locfile-kw.gperf" + {"nostr", tok_nostr, 0}, + {""}, {""}, +-#line 140 "locfile-kw.gperf" ++#line 141 "locfile-kw.gperf" + {"era", tok_era, 0}, + {""}, +-#line 84 "locfile-kw.gperf" ++#line 85 "locfile-kw.gperf" + {"currency_symbol", tok_currency_symbol, 0}, + {""}, +-#line 167 "locfile-kw.gperf" ++#line 168 "locfile-kw.gperf" + {"name_ms", tok_name_ms, 0}, +-#line 165 "locfile-kw.gperf" +- {"name_mrs", tok_name_mrs, 0}, + #line 166 "locfile-kw.gperf" ++ {"name_mrs", tok_name_mrs, 0}, ++#line 167 "locfile-kw.gperf" + {"name_miss", tok_name_miss, 0}, +-#line 83 "locfile-kw.gperf" ++#line 84 "locfile-kw.gperf" + {"int_curr_symbol", tok_int_curr_symbol, 0}, +-#line 190 "locfile-kw.gperf" ++#line 191 "locfile-kw.gperf" + {"source", tok_source, 0}, +-#line 164 "locfile-kw.gperf" ++#line 165 "locfile-kw.gperf" + {"name_mr", tok_name_mr, 0}, +-#line 163 "locfile-kw.gperf" ++#line 164 "locfile-kw.gperf" + {"name_gen", tok_name_gen, 0}, +-#line 202 "locfile-kw.gperf" ++#line 203 "locfile-kw.gperf" + {"date", tok_date, 0}, + {""}, {""}, +-#line 191 "locfile-kw.gperf" ++#line 192 "locfile-kw.gperf" + {"address", tok_address, 0}, +-#line 162 "locfile-kw.gperf" ++#line 163 "locfile-kw.gperf" + {"name_fmt", tok_name_fmt, 0}, + #line 32 "locfile-kw.gperf" + {"copy", tok_copy, 0}, +-#line 103 "locfile-kw.gperf" ++#line 104 "locfile-kw.gperf" + {"int_n_sign_posn", tok_int_n_sign_posn, 0}, + {""}, {""}, +-#line 131 "locfile-kw.gperf" ++#line 132 "locfile-kw.gperf" + {"day", tok_day, 0}, +-#line 105 "locfile-kw.gperf" ++#line 106 "locfile-kw.gperf" + {"duo_currency_symbol", tok_duo_currency_symbol, 0}, + {""}, {""}, {""}, +-#line 150 "locfile-kw.gperf" ++#line 151 "locfile-kw.gperf" + {"date_fmt", tok_date_fmt, 0}, +-#line 64 "locfile-kw.gperf" ++#line 65 "locfile-kw.gperf" + {"order_end", tok_order_end, 0}, +-#line 117 "locfile-kw.gperf" ++#line 118 "locfile-kw.gperf" + {"duo_n_sign_posn", tok_duo_n_sign_posn, 0}, + {""}, +-#line 170 "locfile-kw.gperf" ++#line 171 "locfile-kw.gperf" + {"country_name", tok_country_name, 0}, +-#line 71 "locfile-kw.gperf" ++#line 72 "locfile-kw.gperf" + {"reorder-after", tok_reorder_after, 0}, + {""}, {""}, +-#line 155 "locfile-kw.gperf" ++#line 156 "locfile-kw.gperf" + {"noexpr", tok_noexpr, 0}, + #line 50 "locfile-kw.gperf" + {"tolower", tok_tolower, 0}, +-#line 198 "locfile-kw.gperf" ++#line 199 "locfile-kw.gperf" + {"audience", tok_audience, 0}, + {""}, {""}, {""}, + #line 49 "locfile-kw.gperf" + {"toupper", tok_toupper, 0}, +-#line 68 "locfile-kw.gperf" ++#line 69 "locfile-kw.gperf" + {"position", tok_position, 0}, + {""}, + #line 40 "locfile-kw.gperf" +@@ -380,196 +380,197 @@ locfile_hash (register const char *str, register size_t len) + {""}, + #line 27 "locfile-kw.gperf" + {"comment_char", tok_comment_char, 0}, +-#line 88 "locfile-kw.gperf" ++#line 89 "locfile-kw.gperf" + {"positive_sign", tok_positive_sign, 0}, + {""}, {""}, {""}, {""}, +-#line 61 "locfile-kw.gperf" ++#line 62 "locfile-kw.gperf" + {"symbol-equivalence", tok_symbol_equivalence, 0}, + {""}, +-#line 102 "locfile-kw.gperf" ++#line 103 "locfile-kw.gperf" + {"int_p_sign_posn", tok_int_p_sign_posn, 0}, +-#line 175 "locfile-kw.gperf" ++#line 176 "locfile-kw.gperf" + {"country_car", tok_country_car, 0}, + {""}, {""}, +-#line 104 "locfile-kw.gperf" ++#line 105 "locfile-kw.gperf" + {"duo_int_curr_symbol", tok_duo_int_curr_symbol, 0}, + {""}, {""}, +-#line 135 "locfile-kw.gperf" ++#line 136 "locfile-kw.gperf" + {"d_t_fmt", tok_d_t_fmt, 0}, + {""}, {""}, +-#line 116 "locfile-kw.gperf" ++#line 117 "locfile-kw.gperf" + {"duo_p_sign_posn", tok_duo_p_sign_posn, 0}, +-#line 187 "locfile-kw.gperf" ++#line 188 "locfile-kw.gperf" + {"measurement", tok_measurement, 0}, +-#line 176 "locfile-kw.gperf" ++#line 177 "locfile-kw.gperf" + {"country_isbn", tok_country_isbn, 0}, + #line 37 "locfile-kw.gperf" + {"outdigit", tok_outdigit, 0}, + {""}, {""}, +-#line 143 "locfile-kw.gperf" ++#line 144 "locfile-kw.gperf" + {"era_d_t_fmt", tok_era_d_t_fmt, 0}, + {""}, {""}, {""}, + #line 34 "locfile-kw.gperf" + {"lower", tok_lower, 0}, +-#line 183 "locfile-kw.gperf" ++#line 184 "locfile-kw.gperf" + {"tel_dom_fmt", tok_tel_dom_fmt, 0}, +-#line 171 "locfile-kw.gperf" ++#line 172 "locfile-kw.gperf" + {"country_post", tok_country_post, 0}, +-#line 148 "locfile-kw.gperf" ++#line 149 "locfile-kw.gperf" + {"cal_direction", tok_cal_direction, 0}, +- {""}, +-#line 139 "locfile-kw.gperf" ++#line 57 "locfile-kw.gperf" ++ {"codepoint_collation", tok_codepoint_collation, 0}, ++#line 140 "locfile-kw.gperf" + {"t_fmt_ampm", tok_t_fmt_ampm, 0}, +-#line 91 "locfile-kw.gperf" ++#line 92 "locfile-kw.gperf" + {"frac_digits", tok_frac_digits, 0}, + {""}, {""}, +-#line 177 "locfile-kw.gperf" ++#line 178 "locfile-kw.gperf" + {"lang_name", tok_lang_name, 0}, +-#line 90 "locfile-kw.gperf" ++#line 91 "locfile-kw.gperf" + {"int_frac_digits", tok_int_frac_digits, 0}, + {""}, +-#line 121 "locfile-kw.gperf" ++#line 122 "locfile-kw.gperf" + {"uno_valid_to", tok_uno_valid_to, 0}, +-#line 126 "locfile-kw.gperf" ++#line 127 "locfile-kw.gperf" + {"decimal_point", tok_decimal_point, 0}, + {""}, +-#line 133 "locfile-kw.gperf" ++#line 134 "locfile-kw.gperf" + {"abmon", tok_abmon, 0}, + {""}, {""}, {""}, {""}, +-#line 107 "locfile-kw.gperf" ++#line 108 "locfile-kw.gperf" + {"duo_frac_digits", tok_duo_frac_digits, 0}, +-#line 182 "locfile-kw.gperf" ++#line 183 "locfile-kw.gperf" + {"tel_int_fmt", tok_tel_int_fmt, 0}, +-#line 123 "locfile-kw.gperf" ++#line 124 "locfile-kw.gperf" + {"duo_valid_to", tok_duo_valid_to, 0}, +-#line 146 "locfile-kw.gperf" ++#line 147 "locfile-kw.gperf" + {"first_weekday", tok_first_weekday, 0}, + {""}, +-#line 130 "locfile-kw.gperf" ++#line 131 "locfile-kw.gperf" + {"abday", tok_abday, 0}, + {""}, +-#line 200 "locfile-kw.gperf" ++#line 201 "locfile-kw.gperf" + {"abbreviation", tok_abbreviation, 0}, +-#line 147 "locfile-kw.gperf" ++#line 148 "locfile-kw.gperf" + {"first_workday", tok_first_workday, 0}, + {""}, {""}, +-#line 97 "locfile-kw.gperf" ++#line 98 "locfile-kw.gperf" + {"n_sign_posn", tok_n_sign_posn, 0}, + {""}, {""}, {""}, +-#line 145 "locfile-kw.gperf" ++#line 146 "locfile-kw.gperf" + {"alt_digits", tok_alt_digits, 0}, + {""}, {""}, +-#line 128 "locfile-kw.gperf" ++#line 129 "locfile-kw.gperf" + {"grouping", tok_grouping, 0}, + {""}, + #line 45 "locfile-kw.gperf" + {"blank", tok_blank, 0}, + {""}, {""}, +-#line 196 "locfile-kw.gperf" ++#line 197 "locfile-kw.gperf" + {"language", tok_language, 0}, +-#line 120 "locfile-kw.gperf" ++#line 121 "locfile-kw.gperf" + {"uno_valid_from", tok_uno_valid_from, 0}, + {""}, +-#line 199 "locfile-kw.gperf" ++#line 200 "locfile-kw.gperf" + {"application", tok_application, 0}, + {""}, +-#line 80 "locfile-kw.gperf" ++#line 81 "locfile-kw.gperf" + {"elifndef", tok_elifndef, 0}, + {""}, {""}, {""}, {""}, {""}, +-#line 122 "locfile-kw.gperf" ++#line 123 "locfile-kw.gperf" + {"duo_valid_from", tok_duo_valid_from, 0}, +-#line 57 "locfile-kw.gperf" ++#line 58 "locfile-kw.gperf" + {"coll_weight_max", tok_coll_weight_max, 0}, + {""}, +-#line 79 "locfile-kw.gperf" ++#line 80 "locfile-kw.gperf" + {"elifdef", tok_elifdef, 0}, +-#line 67 "locfile-kw.gperf" ++#line 68 "locfile-kw.gperf" + {"backward", tok_backward, 0}, +-#line 106 "locfile-kw.gperf" ++#line 107 "locfile-kw.gperf" + {"duo_int_frac_digits", tok_duo_int_frac_digits, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, +-#line 96 "locfile-kw.gperf" ++#line 97 "locfile-kw.gperf" + {"p_sign_posn", tok_p_sign_posn, 0}, + {""}, +-#line 203 "locfile-kw.gperf" ++#line 204 "locfile-kw.gperf" + {"category", tok_category, 0}, + {""}, {""}, {""}, {""}, +-#line 134 "locfile-kw.gperf" ++#line 135 "locfile-kw.gperf" + {"mon", tok_mon, 0}, + {""}, +-#line 124 "locfile-kw.gperf" ++#line 125 "locfile-kw.gperf" + {"conversion_rate", tok_conversion_rate, 0}, + {""}, {""}, {""}, {""}, {""}, +-#line 63 "locfile-kw.gperf" ++#line 64 "locfile-kw.gperf" + {"order_start", tok_order_start, 0}, + {""}, {""}, {""}, {""}, {""}, +-#line 178 "locfile-kw.gperf" ++#line 179 "locfile-kw.gperf" + {"lang_ab", tok_lang_ab, 0}, +-#line 180 "locfile-kw.gperf" ++#line 181 "locfile-kw.gperf" + {"lang_lib", tok_lang_lib, 0}, + {""}, {""}, {""}, +-#line 192 "locfile-kw.gperf" ++#line 193 "locfile-kw.gperf" + {"contact", tok_contact, 0}, + {""}, {""}, {""}, +-#line 173 "locfile-kw.gperf" ++#line 174 "locfile-kw.gperf" + {"country_ab3", tok_country_ab3, 0}, + {""}, {""}, {""}, +-#line 193 "locfile-kw.gperf" ++#line 194 "locfile-kw.gperf" + {"email", tok_email, 0}, +-#line 172 "locfile-kw.gperf" ++#line 173 "locfile-kw.gperf" + {"country_ab2", tok_country_ab2, 0}, + {""}, {""}, {""}, + #line 55 "locfile-kw.gperf" + {"default_missing", tok_default_missing, 0}, + {""}, {""}, +-#line 195 "locfile-kw.gperf" ++#line 196 "locfile-kw.gperf" + {"fax", tok_fax, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +-#line 174 "locfile-kw.gperf" ++#line 175 "locfile-kw.gperf" + {"country_num", tok_country_num, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, + #line 51 "locfile-kw.gperf" + {"map", tok_map, 0}, +-#line 65 "locfile-kw.gperf" ++#line 66 "locfile-kw.gperf" + {"from", tok_from, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +-#line 86 "locfile-kw.gperf" ++#line 87 "locfile-kw.gperf" + {"mon_thousands_sep", tok_mon_thousands_sep, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +-#line 81 "locfile-kw.gperf" ++#line 82 "locfile-kw.gperf" + {"endif", tok_endif, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +-#line 151 "locfile-kw.gperf" ++#line 152 "locfile-kw.gperf" + {"alt_mon", tok_alt_mon, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +-#line 76 "locfile-kw.gperf" ++#line 77 "locfile-kw.gperf" + {"undef", tok_undef, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +-#line 59 "locfile-kw.gperf" ++#line 60 "locfile-kw.gperf" + {"collating-element", tok_collating_element, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +-#line 152 "locfile-kw.gperf" ++#line 153 "locfile-kw.gperf" + {"ab_alt_mon", tok_ab_alt_mon, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +-#line 66 "locfile-kw.gperf" ++#line 67 "locfile-kw.gperf" + {"forward", tok_forward, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +-#line 85 "locfile-kw.gperf" ++#line 86 "locfile-kw.gperf" + {"mon_decimal_point", tok_mon_decimal_point, 0}, + {""}, {""}, +-#line 169 "locfile-kw.gperf" ++#line 170 "locfile-kw.gperf" + {"postal_fmt", tok_postal_fmt, 0}, + {""}, {""}, {""}, {""}, {""}, +-#line 60 "locfile-kw.gperf" ++#line 61 "locfile-kw.gperf" + {"collating-symbol", tok_collating_symbol, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +@@ -582,15 +583,15 @@ locfile_hash (register const char *str, register size_t len) + #line 38 "locfile-kw.gperf" + {"alnum", tok_alnum, 0}, + {""}, +-#line 87 "locfile-kw.gperf" ++#line 88 "locfile-kw.gperf" + {"mon_grouping", tok_mon_grouping, 0}, + {""}, +-#line 179 "locfile-kw.gperf" ++#line 180 "locfile-kw.gperf" + {"lang_term", tok_lang_term, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +-#line 77 "locfile-kw.gperf" ++#line 78 "locfile-kw.gperf" + {"ifdef", tok_ifdef, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +@@ -598,7 +599,7 @@ locfile_hash (register const char *str, register size_t len) + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +-#line 138 "locfile-kw.gperf" ++#line 139 "locfile-kw.gperf" + {"am_pm", tok_am_pm, 0} + }; + +diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h +index abeff8a09e..0bf771c752 100644 +--- a/locale/programs/locfile-token.h ++++ b/locale/programs/locfile-token.h +@@ -90,6 +90,7 @@ enum token_t + tok_translit_ignore, + tok_default_missing, + tok_lc_collate, ++ tok_codepoint_collation, + tok_coll_weight_max, + tok_section_symbol, + tok_collating_element, +-- +2.28.0.windows.1 + diff --git a/backport-Add-generic-C.UTF-8-locale-Bug-17318.patch b/backport-Add-generic-C.UTF-8-locale-Bug-17318.patch new file mode 100644 index 0000000000000000000000000000000000000000..2623f4ff9ff115db1ca04a59661b78b380b86851 --- /dev/null +++ b/backport-Add-generic-C.UTF-8-locale-Bug-17318.patch @@ -0,0 +1,1462 @@ +From 466f2be6c08070e9113ae2fdc7acd5d8828cba50 Mon Sep 17 00:00:00 2001 +From: Carlos O'Donell +Date: Wed, 1 Sep 2021 15:19:19 -0400 +Subject: [PATCH] Add generic C.UTF-8 locale (Bug 17318) + +We add a new C.UTF-8 locale. This locale is not builtin to glibc, but +is provided as a distinct locale. The locale provides full support for +UTF-8 and this includes full code point sorting via STRCMP-based +collation (strcmp or wcscmp). + +The collation uses a new keyword 'codepoint_collation' which drops all +collation rules and generates an empty zero rules collation to enable +STRCMP usage in collation. This ensures that we get full code point +sorting for C.UTF-8 with a minimal 1406 bytes of overhead (LC_COLLATE +structure information and ASCII collating tables). + +The new locale is added to SUPPORTED. Minimal test data for specific +code points (minus those not supported by collate-test) is provided in +C.UTF-8.in, and this verifies code point sorting is working reasonably +across the range. The locale was tested manually with the full set of +code points without failure. + +The locale is harmonized with locales already shipping in various +downstream distributions. A new tst-iconv9 test is added which verifies +the C.UTF-8 locale is generally usable. + +Testing for fnmatch, regexec, and recomp is provided by extending +bug-regex1, bugregex19, bug-regex4, bug-regex6, transbug, tst-fnmatch, +tst-regcomp-truncated, and tst-regex to use C.UTF-8. + +Tested on x86_64 or i686 without regression. + +Reviewed-by: Florian Weimer + +Conflict:adapt posix/bug-regex1.c context delete NEWS +Reference:https://sourceware.org/git/?p=glibc.git;a=commit;h=466f2be6c08070e9113ae2fdc7acd5d8828cba50 +--- + iconv/Makefile | 22 +- + iconv/tst-iconv9.c | 87 ++++++ + localedata/C.UTF-8.in | 157 ++++++++++ + localedata/Makefile | 2 + + localedata/SUPPORTED | 1 + + localedata/locales/C | 194 ++++++++++++ + posix/Makefile | 16 +- + posix/bug-regex1.c | 20 ++ + posix/bug-regex19.c | 22 +- + posix/bug-regex4.c | 25 ++ + posix/bug-regex6.c | 2 +- + posix/transbug.c | 24 +- + posix/tst-fnmatch.input | 549 +++++++++++++++++++++++++++++++++- + posix/tst-regcomp-truncated.c | 1 + + posix/tst-regex.c | 33 +- + 15 files changed, 1122 insertions(+), 33 deletions(-) + create mode 100644 iconv/tst-iconv9.c + create mode 100644 localedata/C.UTF-8.in + create mode 100644 localedata/locales/C + +diff --git a/iconv/Makefile b/iconv/Makefile +index 07d77c9e..9993f2d3 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -43,8 +43,19 @@ CFLAGS-charmap.c += -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \ + CFLAGS-linereader.c += -DNO_TRANSLITERATION + CFLAGS-simple-hash.c += -I../locale + +-tests = tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \ +- tst-iconv7 tst-iconv8 tst-iconv-mt tst-iconv-opt ++tests = \ ++ tst-iconv1 \ ++ tst-iconv2 \ ++ tst-iconv3 \ ++ tst-iconv4 \ ++ tst-iconv5 \ ++ tst-iconv6 \ ++ tst-iconv7 \ ++ tst-iconv8 \ ++ tst-iconv9 \ ++ tst-iconv-mt \ ++ tst-iconv-opt \ ++ # tests + + others = iconv_prog iconvconfig + install-others-programs = $(inst_bindir)/iconv +@@ -83,10 +94,15 @@ endif + include ../Rules + + ifeq ($(run-built-tests),yes) +-LOCALES := en_US.UTF-8 ++# We have to generate locales (list sorted alphabetically) ++LOCALES := \ ++ C.UTF-8 \ ++ en_US.UTF-8 \ ++ # LOCALES + include ../gen-locales.mk + + $(objpfx)tst-iconv-opt.out: $(gen-locales) ++$(objpfx)tst-iconv9.out: $(gen-locales) + endif + + $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force) +diff --git a/iconv/tst-iconv9.c b/iconv/tst-iconv9.c +new file mode 100644 +index 00000000..c46b1833 +--- /dev/null ++++ b/iconv/tst-iconv9.c +@@ -0,0 +1,87 @@ ++/* Verify that using C.UTF-8 works. ++ ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* This test does two things: ++ (1) Verify that we have likely included translit_combining in C.UTF-8. ++ (2) Verify default_missing is '?' as expected. */ ++ ++/* ISO-8859-1 encoding of "für". */ ++char iso88591_in[] = { 0x66, 0xfc, 0x72, 0x0 }; ++/* ASCII transliteration is "fur" with C.UTF-8 translit_combining. */ ++char ascii_exp[] = { 0x66, 0x75, 0x72, 0x0 }; ++ ++/* First 3-byte UTF-8 code point. */ ++char utf8_in[] = { 0xe0, 0xa0, 0x80, 0x0 }; ++/* There is no ASCII transliteration for SAMARITAN LETTER ALAF ++ so we get default_missing used which is '?'. */ ++char default_missing_exp[] = { 0x3f, 0x0 }; ++ ++static int ++do_test (void) ++{ ++ char ascii_out[5]; ++ iconv_t cd; ++ char *inbuf; ++ char *outbuf; ++ size_t inbytes; ++ size_t outbytes; ++ size_t n; ++ ++ /* The C.UTF-8 locale should include translit_combining, which provides ++ the transliteration for "LATIN SMALL LETTER U WITH DIAERESIS" which ++ is not provided by locale/C-translit.h.in. */ ++ xsetlocale (LC_ALL, "C.UTF-8"); ++ ++ /* From ISO-8859-1 to ASCII. */ ++ cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "ISO-8859-1"); ++ TEST_VERIFY (cd != (iconv_t) -1); ++ inbuf = iso88591_in; ++ inbytes = 3; ++ outbuf = ascii_out; ++ outbytes = 3; ++ n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes); ++ TEST_VERIFY (n != -1); ++ *outbuf = '\0'; ++ TEST_COMPARE_BLOB (ascii_out, 3, ascii_exp, 3); ++ TEST_VERIFY (iconv_close (cd) == 0); ++ ++ /* From UTF-8 to ASCII. */ ++ cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "UTF-8"); ++ TEST_VERIFY (cd != (iconv_t) -1); ++ inbuf = utf8_in; ++ inbytes = 3; ++ outbuf = ascii_out; ++ outbytes = 3; ++ n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes); ++ TEST_VERIFY (n != -1); ++ *outbuf = '\0'; ++ TEST_COMPARE_BLOB (ascii_out, 1, default_missing_exp, 1); ++ TEST_VERIFY (iconv_close (cd) == 0); ++ ++ return 0; ++} ++ ++#include +diff --git a/localedata/C.UTF-8.in b/localedata/C.UTF-8.in +new file mode 100644 +index 00000000..c31dcc2a +--- /dev/null ++++ b/localedata/C.UTF-8.in +@@ -0,0 +1,157 @@ ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++ ; ++! ; ++" ; ++# ; ++$ ; ++% ; ++& ; ++' ; ++) ; ++* ; +++ ; ++, ; ++- ; ++. ; ++/ ; ++0 ; ++1 ; ++2 ; ++3 ; ++4 ; ++5 ; ++6 ; ++7 ; ++8 ; ++9 ; ++< ; ++= ; ++> ; ++? ; ++@ ; ++A ; ++B ; ++C ; ++D ; ++E ; ++F ; ++G ; ++H ; ++I ; ++J ; ++K ; ++L ; ++M ; ++N ; ++O ; ++P ; ++Q ; ++R ; ++S ; ++T ; ++U ; ++V ; ++W ; ++X ; ++Y ; ++Z ; ++[ ; ++\ ; ++] ; ++^ ; ++_ ; ++` ; ++a ; ++b ; ++c ; ++d ; ++e ; ++f ; ++g ; ++h ; ++i ; ++j ; ++k ; ++l ; ++m ; ++n ; ++o ; ++p ; ++q ; ++r ; ++s ; ++t ; ++u ; ++v ; ++w ; ++x ; ++y ; ++z ; ++{ ; ++| ; ++} ; ++~ ; ++ ; ++€ ; ++ÿ ; ++Ā ; ++࿿ ; ++က ; ++� ; ++￿ ; ++𐀀 ; ++🿿 ; ++𠀀 ; ++𯿿 ; ++𰀀 ; ++𿿾 ; ++񀀀 ; ++񏿿 ; ++񐀀 ; ++񟿿 ; ++񠀀 ; ++񯿿 ; ++񰀀 ; ++񿿿 ; ++򀀀 ; ++򏿿 ; ++򐀀 ; ++򟿿 ; ++򠀀 ; ++򯿿 ; ++򰀀 ; ++򿿿 ; ++󀀁 ; ++󏿌 ; ++󐀎 ; ++󟿿 ; ++󠀁 ; ++󯿿 ; ++󰀁 ; ++󿿿 ; ++􀀁 ; ++􏿿 ; +diff --git a/localedata/Makefile b/localedata/Makefile +index 2e899bf2..9887d89e 100644 +--- a/localedata/Makefile ++++ b/localedata/Makefile +@@ -47,6 +47,7 @@ test-input := \ + bg_BG.UTF-8 \ + br_FR.UTF-8 \ + bs_BA.UTF-8 \ ++ C.UTF-8 \ + ckb_IQ.UTF-8 \ + cmn_TW.UTF-8 \ + crh_UA.UTF-8 \ +@@ -206,6 +207,7 @@ LOCALES := \ + bg_BG.UTF-8 \ + br_FR.UTF-8 \ + bs_BA.UTF-8 \ ++ C.UTF-8 \ + ckb_IQ.UTF-8 \ + cmn_TW.UTF-8 \ + crh_UA.UTF-8 \ +diff --git a/localedata/SUPPORTED b/localedata/SUPPORTED +index 1ee5b5e8..d768aa47 100644 +--- a/localedata/SUPPORTED ++++ b/localedata/SUPPORTED +@@ -79,6 +79,7 @@ brx_IN/UTF-8 \ + bs_BA.UTF-8/UTF-8 \ + bs_BA/ISO-8859-2 \ + byn_ER/UTF-8 \ ++C.UTF-8/UTF-8 \ + ca_AD.UTF-8/UTF-8 \ + ca_AD/ISO-8859-15 \ + ca_ES.UTF-8/UTF-8 \ +diff --git a/localedata/locales/C b/localedata/locales/C +new file mode 100644 +index 00000000..ca801c79 +--- /dev/null ++++ b/localedata/locales/C +@@ -0,0 +1,194 @@ ++escape_char / ++comment_char % ++% Locale for C locale in UTF-8 ++ ++LC_IDENTIFICATION ++title "C locale" ++source "" ++address "" ++contact "" ++email "bug-glibc-locales@gnu.org" ++tel "" ++fax "" ++language "" ++territory "" ++revision "2.0" ++date "2020-06-28" ++category "i18n:2012";LC_IDENTIFICATION ++category "i18n:2012";LC_CTYPE ++category "i18n:2012";LC_COLLATE ++category "i18n:2012";LC_TIME ++category "i18n:2012";LC_NUMERIC ++category "i18n:2012";LC_MONETARY ++category "i18n:2012";LC_MESSAGES ++category "i18n:2012";LC_PAPER ++category "i18n:2012";LC_NAME ++category "i18n:2012";LC_ADDRESS ++category "i18n:2012";LC_TELEPHONE ++category "i18n:2012";LC_MEASUREMENT ++END LC_IDENTIFICATION ++ ++LC_CTYPE ++% Include only the i18n character type classes without any of the ++% transliteration that i18n uses by default. ++copy "i18n_ctype" ++ ++% Include the neutral transliterations. The builtin C and ++% POSIX locales have +1600 transliterations that are built into ++% the locales, and these are a superset of those. ++translit_start ++include "translit_neutral";"" ++% We must use '?' for default_missing because the transliteration ++% framework includes it directly into the output and so it must ++% be compatible with ASCII if that is the target character set. ++default_missing ++translit_end ++ ++% Include the transliterations that can convert combined characters. ++% These are generally expected by users. ++translit_start ++include "translit_combining";"" ++translit_end ++ ++END LC_CTYPE ++ ++LC_COLLATE ++% The keyword 'codepoint_collation' in any part of any LC_COLLATE ++% immediately discards all collation information and causes the ++% locale to use strcmp/wcscmp for collation comparison. This is ++% exactly what is needed for C (ASCII) or C.UTF-8. ++codepoint_collation ++END LC_COLLATE ++ ++LC_MONETARY ++ ++% This is the 14652 i18n fdcc-set definition for the LC_MONETARY ++% category (except for the int_curr_symbol and currency_symbol, they are ++% empty in the 14652 i18n fdcc-set definition and also empty in ++% glibc/locale/C-monetary.c.). ++int_curr_symbol "" ++currency_symbol "" ++mon_decimal_point "." ++mon_thousands_sep "" ++mon_grouping -1 ++positive_sign "" ++negative_sign "-" ++int_frac_digits -1 ++frac_digits -1 ++p_cs_precedes -1 ++int_p_sep_by_space -1 ++p_sep_by_space -1 ++n_cs_precedes -1 ++int_n_sep_by_space -1 ++n_sep_by_space -1 ++p_sign_posn -1 ++n_sign_posn -1 ++% ++END LC_MONETARY ++ ++LC_NUMERIC ++% This is the POSIX Locale definition for ++% the LC_NUMERIC category. ++% ++decimal_point "." ++thousands_sep "" ++grouping -1 ++END LC_NUMERIC ++ ++LC_TIME ++% This is the POSIX Locale definition for the LC_TIME category with the ++% exception that time is per ISO 8601 and 24-hour. ++% ++% Abbreviated weekday names (%a) ++abday "Sun";"Mon";"Tue";"Wed";"Thu";"Fri";"Sat" ++ ++% Full weekday names (%A) ++day "Sunday";"Monday";"Tuesday";"Wednesday";"Thursday";/ ++ "Friday";"Saturday" ++ ++% Abbreviated month names (%b) ++abmon "Jan";"Feb";"Mar";"Apr";"May";"Jun";"Jul";"Aug";"Sep";/ ++ "Oct";"Nov";"Dec" ++ ++% Full month names (%B) ++mon "January";"February";"March";"April";"May";"June";"July";/ ++ "August";"September";"October";"November";"December" ++ ++% Week description, consists of three fields: ++% 1. Number of days in a week. ++% 2. Gregorian date that is a first weekday (19971130 for Sunday, 19971201 for Monday). ++% 3. The weekday number to be contained in the first week of the year. ++% ++% ISO 8601 conforming applications should use the values 7, 19971201 (a ++% Monday), and 4 (Thursday), respectively. ++week 7;19971201;4 ++first_weekday 1 ++first_workday 2 ++ ++% Appropriate date and time representation (%c) ++d_t_fmt "%a %b %e %H:%M:%S %Y" ++ ++% Appropriate date representation (%x) ++d_fmt "%m/%d/%y" ++ ++% Appropriate time representation (%X) ++t_fmt "%H:%M:%S" ++ ++% Appropriate AM/PM time representation (%r) ++t_fmt_ampm "%I:%M:%S %p" ++ ++% Equivalent of AM/PM (%p) ++am_pm "AM";"PM" ++ ++% Appropriate date representation (date(1)) ++date_fmt "%a %b %e %H:%M:%S %Z %Y" ++END LC_TIME ++ ++LC_MESSAGES ++% This is the POSIX Locale definition for ++% the LC_NUMERIC category. ++% ++yesexpr "^[yY]" ++noexpr "^[nN]" ++yesstr "Yes" ++nostr "No" ++END LC_MESSAGES ++ ++LC_PAPER ++% This is the ISO/IEC 14652 "i18n" definition for ++% the LC_PAPER category. ++% (A4 paper, this is also used in the built in C/POSIX ++% locale in glibc/locale/C-paper.c) ++height 297 ++width 210 ++END LC_PAPER ++ ++LC_NAME ++% This is the ISO/IEC 14652 "i18n" definition for ++% the LC_NAME category. ++% (also used in the built in C/POSIX locale in glibc/locale/C-name.c) ++name_fmt "%p%t%g%t%m%t%f" ++END LC_NAME ++ ++LC_ADDRESS ++% This is the ISO/IEC 14652 "i18n" definition for ++% the LC_ADDRESS category. ++% (also used in the built in C/POSIX locale in glibc/locale/C-address.c) ++postal_fmt "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N" ++END LC_ADDRESS ++ ++LC_TELEPHONE ++% This is the ISO/IEC 14652 "i18n" definition for ++% the LC_TELEPHONE category. ++% "+%c %a %l" ++tel_int_fmt "+%c %a %l" ++% (also used in the built in C/POSIX locale in glibc/locale/C-telephone.c) ++END LC_TELEPHONE ++ ++LC_MEASUREMENT ++% This is the ISO/IEC 14652 "i18n" definition for ++% the LC_MEASUREMENT category. ++% (same as in the built in C/POSIX locale in glibc/locale/C-measurement.c) ++%metric ++measurement 1 ++END LC_MEASUREMENT +diff --git a/posix/Makefile b/posix/Makefile +index 09460a28..61fcdf01 100644 +--- a/posix/Makefile ++++ b/posix/Makefile +@@ -191,9 +191,19 @@ $(objpfx)wordexp-tst.out: wordexp-tst.sh $(objpfx)wordexp-test + $(evaluate-test) + endif + +-LOCALES := cs_CZ.UTF-8 da_DK.ISO-8859-1 de_DE.ISO-8859-1 de_DE.UTF-8 \ +- en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 \ +- cs_CZ.ISO-8859-2 ++LOCALES := \ ++ cs_CZ.ISO-8859-2 \ ++ cs_CZ.UTF-8 \ ++ C.UTF-8 \ ++ da_DK.ISO-8859-1 \ ++ de_DE.ISO-8859-1 \ ++ de_DE.UTF-8 \ ++ en_US.UTF-8 \ ++ es_US.ISO-8859-1 \ ++ es_US.UTF-8 \ ++ ja_JP.EUC-JP \ ++ tr_TR.UTF-8 \ ++ # LOCALES + include ../gen-locales.mk + + $(objpfx)bug-regex1.out: $(gen-locales) +diff --git a/posix/bug-regex1.c b/posix/bug-regex1.c +index 38eb5439..7e9f4ec4 100644 +--- a/posix/bug-regex1.c ++++ b/posix/bug-regex1.c +@@ -41,6 +41,26 @@ main (void) + puts (" -> OK"); + } + ++ puts ("in C.UTF-8 locale"); ++ setlocale (LC_ALL, "C.UTF-8"); ++ s = re_compile_pattern ("[an\371]*n", 7, ®ex); ++ if (s != NULL) ++ { ++ puts ("re_compile_pattern return non-NULL value"); ++ result = 1; ++ } ++ else ++ { ++ match = re_match (®ex, "an", 2, 0, ®s); ++ if (match != 2) ++ { ++ printf ("re_match returned %d, expected 2\n", match); ++ result = 1; ++ } ++ else ++ puts (" -> OK"); ++ } ++ + puts ("in de_DE.ISO-8859-1 locale"); + setlocale (LC_ALL, "de_DE.ISO-8859-1"); + s = re_compile_pattern ("[an]*n", 7, ®ex); +diff --git a/posix/bug-regex19.c b/posix/bug-regex19.c +index b3fee0a7..e00ff60a 100644 +--- a/posix/bug-regex19.c ++++ b/posix/bug-regex19.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + + #define BRE RE_SYNTAX_POSIX_BASIC + #define ERE RE_SYNTAX_POSIX_EXTENDED +@@ -407,8 +408,8 @@ do_mb_tests (const struct test_s *test) + return 0; + } + +-int +-main (void) ++static int ++do_test (void) + { + size_t i; + int ret = 0; +@@ -417,20 +418,17 @@ main (void) + + for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i) + { +- if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL) +- { +- puts ("setlocale de_DE.ISO-8859-1 failed"); +- ret = 1; +- } ++ xsetlocale (LC_ALL, "de_DE.ISO-8859-1"); + ret |= do_one_test (&tests[i], ""); +- if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL) +- { +- puts ("setlocale de_DE.UTF-8 failed"); +- ret = 1; +- } ++ xsetlocale (LC_ALL, "de_DE.UTF-8"); ++ ret |= do_one_test (&tests[i], "UTF-8 "); ++ ret |= do_mb_tests (&tests[i]); ++ xsetlocale (LC_ALL, "C.UTF-8"); + ret |= do_one_test (&tests[i], "UTF-8 "); + ret |= do_mb_tests (&tests[i]); + } + + return ret; + } ++ ++#include +diff --git a/posix/bug-regex4.c b/posix/bug-regex4.c +index 8d5ae115..6475833c 100644 +--- a/posix/bug-regex4.c ++++ b/posix/bug-regex4.c +@@ -32,8 +32,33 @@ main (void) + + memset (®ex, '\0', sizeof (regex)); + ++ printf ("INFO: Checking C.\n"); + setlocale (LC_ALL, "C"); + ++ s = re_compile_pattern ("ab[cde]", 7, ®ex); ++ if (s != NULL) ++ { ++ puts ("re_compile_pattern returned non-NULL value"); ++ result = 1; ++ } ++ else ++ { ++ match[0] = re_search_2 (®ex, "xyabez", 6, "", 0, 1, 5, NULL, 6); ++ match[1] = re_search_2 (®ex, NULL, 0, "abc", 3, 0, 3, NULL, 3); ++ match[2] = re_search_2 (®ex, "xya", 3, "bd", 2, 2, 3, NULL, 5); ++ if (match[0] != 2 || match[1] != 0 || match[2] != 2) ++ { ++ printf ("re_search_2 returned %d,%d,%d, expected 2,0,2\n", ++ match[0], match[1], match[2]); ++ result = 1; ++ } ++ else ++ puts (" -> OK"); ++ } ++ ++ printf ("INFO: Checking C.UTF-8.\n"); ++ setlocale (LC_ALL, "C.UTF-8"); ++ + s = re_compile_pattern ("ab[cde]", 7, ®ex); + if (s != NULL) + { +diff --git a/posix/bug-regex6.c b/posix/bug-regex6.c +index 2bdf2126..0929b69b 100644 +--- a/posix/bug-regex6.c ++++ b/posix/bug-regex6.c +@@ -30,7 +30,7 @@ main (int argc, char *argv[]) + regex_t re; + regmatch_t mat[10]; + int i, j, ret = 0; +- const char *locales[] = { "C", "de_DE.UTF-8" }; ++ const char *locales[] = { "C", "C.UTF-8", "de_DE.UTF-8" }; + const char *string = "http://www.regex.com/pattern/matching.html#intro"; + regmatch_t expect[10] = { + { 0, 48 }, { 0, 5 }, { 0, 4 }, { 5, 20 }, { 7, 20 }, { 20, 42 }, +diff --git a/posix/transbug.c b/posix/transbug.c +index d0983b4d..b240177c 100644 +--- a/posix/transbug.c ++++ b/posix/transbug.c +@@ -116,16 +116,32 @@ do_test (void) + static const char lower[] = "[[:lower:]]+"; + static const char upper[] = "[[:upper:]]+"; + struct re_registers regs[4]; ++ int result = 0; + ++#define CHECK(exp) \ ++ if (exp) { puts (#exp); result = 1; } ++ ++ printf ("INFO: Checking C.\n"); + setlocale (LC_ALL, "C"); + + (void) re_set_syntax (RE_SYNTAX_GNU_AWK); + +- int result; +-#define CHECK(exp) \ +- if (exp) { puts (#exp); result = 1; } ++ result |= run_test (lower, regs); ++ result |= run_test (upper, ®s[2]); ++ if (! result) ++ { ++ CHECK (regs[0].start[0] != regs[2].start[0]); ++ CHECK (regs[0].end[0] != regs[2].end[0]); ++ CHECK (regs[1].start[0] != regs[3].start[0]); ++ CHECK (regs[1].end[0] != regs[3].end[0]); ++ } ++ ++ printf ("INFO: Checking C.UTF-8.\n"); ++ setlocale (LC_ALL, "C.UTF-8"); ++ ++ (void) re_set_syntax (RE_SYNTAX_GNU_AWK); + +- result = run_test (lower, regs); ++ result |= run_test (lower, regs); + result |= run_test (upper, ®s[2]); + if (! result) + { +diff --git a/posix/tst-fnmatch.input b/posix/tst-fnmatch.input +index 67aac5aa..6ff53180 100644 +--- a/posix/tst-fnmatch.input ++++ b/posix/tst-fnmatch.input +@@ -472,6 +472,397 @@ C "\\" "[Z-\\]]" 0 + C "]" "[Z-\\]]" 0 + C "-" "[Z-\\]]" NOMATCH + ++# B.6 004(C) ++C.UTF-8 "!#%+,-./01234567889" "!#%+,-./01234567889" 0 ++C.UTF-8 ":;=@ABCDEFGHIJKLMNO" ":;=@ABCDEFGHIJKLMNO" 0 ++C.UTF-8 "PQRSTUVWXYZ]abcdefg" "PQRSTUVWXYZ]abcdefg" 0 ++C.UTF-8 "hijklmnopqrstuvwxyz" "hijklmnopqrstuvwxyz" 0 ++C.UTF-8 "^_{}~" "^_{}~" 0 ++ ++# B.6 005(C) ++C.UTF-8 "\"$&'()" "\\\"\\$\\&\\'\\(\\)" 0 ++C.UTF-8 "*?[\\`|" "\\*\\?\\[\\\\\\`\\|" 0 ++C.UTF-8 "<>" "\\<\\>" 0 ++ ++# B.6 006(C) ++C.UTF-8 "?*[" "[?*[][?*[][?*[]" 0 ++C.UTF-8 "a/b" "?/b" 0 ++ ++# B.6 007(C) ++C.UTF-8 "a/b" "a?b" 0 ++C.UTF-8 "a/b" "a/?" 0 ++C.UTF-8 "aa/b" "?/b" NOMATCH ++C.UTF-8 "aa/b" "a?b" NOMATCH ++C.UTF-8 "a/bb" "a/?" NOMATCH ++ ++# B.6 009(C) ++C.UTF-8 "abc" "[abc]" NOMATCH ++C.UTF-8 "x" "[abc]" NOMATCH ++C.UTF-8 "a" "[abc]" 0 ++C.UTF-8 "[" "[[abc]" 0 ++C.UTF-8 "a" "[][abc]" 0 ++C.UTF-8 "a]" "[]a]]" 0 ++ ++# B.6 010(C) ++C.UTF-8 "xyz" "[!abc]" NOMATCH ++C.UTF-8 "x" "[!abc]" 0 ++C.UTF-8 "a" "[!abc]" NOMATCH ++ ++# B.6 011(C) ++C.UTF-8 "]" "[][abc]" 0 ++C.UTF-8 "abc]" "[][abc]" NOMATCH ++C.UTF-8 "[]abc" "[][]abc" NOMATCH ++C.UTF-8 "]" "[!]]" NOMATCH ++C.UTF-8 "aa]" "[!]a]" NOMATCH ++C.UTF-8 "]" "[!a]" 0 ++C.UTF-8 "]]" "[!a]]" 0 ++ ++# B.6 012(C) ++C.UTF-8 "a" "[[.a.]]" 0 ++C.UTF-8 "-" "[[.-.]]" 0 ++C.UTF-8 "-" "[[.-.][.].]]" 0 ++C.UTF-8 "-" "[[.].][.-.]]" 0 ++C.UTF-8 "-" "[[.-.][=u=]]" 0 ++C.UTF-8 "-" "[[.-.][:alpha:]]" 0 ++C.UTF-8 "a" "[![.a.]]" NOMATCH ++ ++# B.6 013(C) ++C.UTF-8 "a" "[[.b.]]" NOMATCH ++C.UTF-8 "a" "[[.b.][.c.]]" NOMATCH ++C.UTF-8 "a" "[[.b.][=b=]]" NOMATCH ++ ++ ++# B.6 015(C) ++C.UTF-8 "a" "[[=a=]]" 0 ++C.UTF-8 "b" "[[=a=]b]" 0 ++C.UTF-8 "b" "[[=a=][=b=]]" 0 ++C.UTF-8 "a" "[[=a=][=b=]]" 0 ++C.UTF-8 "a" "[[=a=][.b.]]" 0 ++C.UTF-8 "a" "[[=a=][:digit:]]" 0 ++ ++# B.6 016(C) ++C.UTF-8 "=" "[[=a=]b]" NOMATCH ++C.UTF-8 "]" "[[=a=]b]" NOMATCH ++C.UTF-8 "a" "[[=b=][=c=]]" NOMATCH ++C.UTF-8 "a" "[[=b=][.].]]" NOMATCH ++C.UTF-8 "a" "[[=b=][:digit:]]" NOMATCH ++ ++# B.6 017(C) ++C.UTF-8 "a" "[[:alnum:]]" 0 ++C.UTF-8 "a" "[![:alnum:]]" NOMATCH ++C.UTF-8 "-" "[[:alnum:]]" NOMATCH ++C.UTF-8 "a]a" "[[:alnum:]]a" NOMATCH ++C.UTF-8 "-" "[[:alnum:]-]" 0 ++C.UTF-8 "aa" "[[:alnum:]]a" 0 ++C.UTF-8 "-" "[![:alnum:]]" 0 ++C.UTF-8 "]" "[!][:alnum:]]" NOMATCH ++C.UTF-8 "[" "[![:alnum:][]" NOMATCH ++C.UTF-8 "a" "[[:alnum:]]" 0 ++C.UTF-8 "b" "[[:alnum:]]" 0 ++C.UTF-8 "c" "[[:alnum:]]" 0 ++C.UTF-8 "d" "[[:alnum:]]" 0 ++C.UTF-8 "e" "[[:alnum:]]" 0 ++C.UTF-8 "f" "[[:alnum:]]" 0 ++C.UTF-8 "g" "[[:alnum:]]" 0 ++C.UTF-8 "h" "[[:alnum:]]" 0 ++C.UTF-8 "i" "[[:alnum:]]" 0 ++C.UTF-8 "j" "[[:alnum:]]" 0 ++C.UTF-8 "k" "[[:alnum:]]" 0 ++C.UTF-8 "l" "[[:alnum:]]" 0 ++C.UTF-8 "m" "[[:alnum:]]" 0 ++C.UTF-8 "n" "[[:alnum:]]" 0 ++C.UTF-8 "o" "[[:alnum:]]" 0 ++C.UTF-8 "p" "[[:alnum:]]" 0 ++C.UTF-8 "q" "[[:alnum:]]" 0 ++C.UTF-8 "r" "[[:alnum:]]" 0 ++C.UTF-8 "s" "[[:alnum:]]" 0 ++C.UTF-8 "t" "[[:alnum:]]" 0 ++C.UTF-8 "u" "[[:alnum:]]" 0 ++C.UTF-8 "v" "[[:alnum:]]" 0 ++C.UTF-8 "w" "[[:alnum:]]" 0 ++C.UTF-8 "x" "[[:alnum:]]" 0 ++C.UTF-8 "y" "[[:alnum:]]" 0 ++C.UTF-8 "z" "[[:alnum:]]" 0 ++C.UTF-8 "A" "[[:alnum:]]" 0 ++C.UTF-8 "B" "[[:alnum:]]" 0 ++C.UTF-8 "C" "[[:alnum:]]" 0 ++C.UTF-8 "D" "[[:alnum:]]" 0 ++C.UTF-8 "E" "[[:alnum:]]" 0 ++C.UTF-8 "F" "[[:alnum:]]" 0 ++C.UTF-8 "G" "[[:alnum:]]" 0 ++C.UTF-8 "H" "[[:alnum:]]" 0 ++C.UTF-8 "I" "[[:alnum:]]" 0 ++C.UTF-8 "J" "[[:alnum:]]" 0 ++C.UTF-8 "K" "[[:alnum:]]" 0 ++C.UTF-8 "L" "[[:alnum:]]" 0 ++C.UTF-8 "M" "[[:alnum:]]" 0 ++C.UTF-8 "N" "[[:alnum:]]" 0 ++C.UTF-8 "O" "[[:alnum:]]" 0 ++C.UTF-8 "P" "[[:alnum:]]" 0 ++C.UTF-8 "Q" "[[:alnum:]]" 0 ++C.UTF-8 "R" "[[:alnum:]]" 0 ++C.UTF-8 "S" "[[:alnum:]]" 0 ++C.UTF-8 "T" "[[:alnum:]]" 0 ++C.UTF-8 "U" "[[:alnum:]]" 0 ++C.UTF-8 "V" "[[:alnum:]]" 0 ++C.UTF-8 "W" "[[:alnum:]]" 0 ++C.UTF-8 "X" "[[:alnum:]]" 0 ++C.UTF-8 "Y" "[[:alnum:]]" 0 ++C.UTF-8 "Z" "[[:alnum:]]" 0 ++C.UTF-8 "0" "[[:alnum:]]" 0 ++C.UTF-8 "1" "[[:alnum:]]" 0 ++C.UTF-8 "2" "[[:alnum:]]" 0 ++C.UTF-8 "3" "[[:alnum:]]" 0 ++C.UTF-8 "4" "[[:alnum:]]" 0 ++C.UTF-8 "5" "[[:alnum:]]" 0 ++C.UTF-8 "6" "[[:alnum:]]" 0 ++C.UTF-8 "7" "[[:alnum:]]" 0 ++C.UTF-8 "8" "[[:alnum:]]" 0 ++C.UTF-8 "9" "[[:alnum:]]" 0 ++C.UTF-8 "!" "[[:alnum:]]" NOMATCH ++C.UTF-8 "#" "[[:alnum:]]" NOMATCH ++C.UTF-8 "%" "[[:alnum:]]" NOMATCH ++C.UTF-8 "+" "[[:alnum:]]" NOMATCH ++C.UTF-8 "," "[[:alnum:]]" NOMATCH ++C.UTF-8 "-" "[[:alnum:]]" NOMATCH ++C.UTF-8 "." "[[:alnum:]]" NOMATCH ++C.UTF-8 "/" "[[:alnum:]]" NOMATCH ++C.UTF-8 ":" "[[:alnum:]]" NOMATCH ++C.UTF-8 ";" "[[:alnum:]]" NOMATCH ++C.UTF-8 "=" "[[:alnum:]]" NOMATCH ++C.UTF-8 "@" "[[:alnum:]]" NOMATCH ++C.UTF-8 "[" "[[:alnum:]]" NOMATCH ++C.UTF-8 "\\" "[[:alnum:]]" NOMATCH ++C.UTF-8 "]" "[[:alnum:]]" NOMATCH ++C.UTF-8 "^" "[[:alnum:]]" NOMATCH ++C.UTF-8 "_" "[[:alnum:]]" NOMATCH ++C.UTF-8 "{" "[[:alnum:]]" NOMATCH ++C.UTF-8 "}" "[[:alnum:]]" NOMATCH ++C.UTF-8 "~" "[[:alnum:]]" NOMATCH ++C.UTF-8 "\"" "[[:alnum:]]" NOMATCH ++C.UTF-8 "$" "[[:alnum:]]" NOMATCH ++C.UTF-8 "&" "[[:alnum:]]" NOMATCH ++C.UTF-8 "'" "[[:alnum:]]" NOMATCH ++C.UTF-8 "(" "[[:alnum:]]" NOMATCH ++C.UTF-8 ")" "[[:alnum:]]" NOMATCH ++C.UTF-8 "*" "[[:alnum:]]" NOMATCH ++C.UTF-8 "?" "[[:alnum:]]" NOMATCH ++C.UTF-8 "`" "[[:alnum:]]" NOMATCH ++C.UTF-8 "|" "[[:alnum:]]" NOMATCH ++C.UTF-8 "<" "[[:alnum:]]" NOMATCH ++C.UTF-8 ">" "[[:alnum:]]" NOMATCH ++C.UTF-8 "\t" "[[:cntrl:]]" 0 ++C.UTF-8 "t" "[[:cntrl:]]" NOMATCH ++C.UTF-8 "t" "[[:lower:]]" 0 ++C.UTF-8 "\t" "[[:lower:]]" NOMATCH ++C.UTF-8 "T" "[[:lower:]]" NOMATCH ++C.UTF-8 "\t" "[[:space:]]" 0 ++C.UTF-8 "t" "[[:space:]]" NOMATCH ++C.UTF-8 "t" "[[:alpha:]]" 0 ++C.UTF-8 "\t" "[[:alpha:]]" NOMATCH ++C.UTF-8 "0" "[[:digit:]]" 0 ++C.UTF-8 "\t" "[[:digit:]]" NOMATCH ++C.UTF-8 "t" "[[:digit:]]" NOMATCH ++C.UTF-8 "\t" "[[:print:]]" NOMATCH ++C.UTF-8 "t" "[[:print:]]" 0 ++C.UTF-8 "T" "[[:upper:]]" 0 ++C.UTF-8 "\t" "[[:upper:]]" NOMATCH ++C.UTF-8 "t" "[[:upper:]]" NOMATCH ++C.UTF-8 "\t" "[[:blank:]]" 0 ++C.UTF-8 "t" "[[:blank:]]" NOMATCH ++C.UTF-8 "\t" "[[:graph:]]" NOMATCH ++C.UTF-8 "t" "[[:graph:]]" 0 ++C.UTF-8 "." "[[:punct:]]" 0 ++C.UTF-8 "t" "[[:punct:]]" NOMATCH ++C.UTF-8 "\t" "[[:punct:]]" NOMATCH ++C.UTF-8 "0" "[[:xdigit:]]" 0 ++C.UTF-8 "\t" "[[:xdigit:]]" NOMATCH ++C.UTF-8 "a" "[[:xdigit:]]" 0 ++C.UTF-8 "A" "[[:xdigit:]]" 0 ++C.UTF-8 "t" "[[:xdigit:]]" NOMATCH ++C.UTF-8 "a" "[[alpha]]" NOMATCH ++C.UTF-8 "a" "[[alpha:]]" NOMATCH ++C.UTF-8 "a]" "[[alpha]]" 0 ++C.UTF-8 "a]" "[[alpha:]]" 0 ++C.UTF-8 "a" "[[:alpha:][.b.]]" 0 ++C.UTF-8 "a" "[[:alpha:][=b=]]" 0 ++C.UTF-8 "a" "[[:alpha:][:digit:]]" 0 ++C.UTF-8 "a" "[[:digit:][:alpha:]]" 0 ++ ++# B.6 018(C) ++C.UTF-8 "a" "[a-c]" 0 ++C.UTF-8 "b" "[a-c]" 0 ++C.UTF-8 "c" "[a-c]" 0 ++C.UTF-8 "a" "[b-c]" NOMATCH ++C.UTF-8 "d" "[b-c]" NOMATCH ++C.UTF-8 "B" "[a-c]" NOMATCH ++C.UTF-8 "b" "[A-C]" NOMATCH ++C.UTF-8 "" "[a-c]" NOMATCH ++C.UTF-8 "as" "[a-ca-z]" NOMATCH ++C.UTF-8 "a" "[[.a.]-c]" 0 ++C.UTF-8 "a" "[a-[.c.]]" 0 ++C.UTF-8 "a" "[[.a.]-[.c.]]" 0 ++C.UTF-8 "b" "[[.a.]-c]" 0 ++C.UTF-8 "b" "[a-[.c.]]" 0 ++C.UTF-8 "b" "[[.a.]-[.c.]]" 0 ++C.UTF-8 "c" "[[.a.]-c]" 0 ++C.UTF-8 "c" "[a-[.c.]]" 0 ++C.UTF-8 "c" "[[.a.]-[.c.]]" 0 ++C.UTF-8 "d" "[[.a.]-c]" NOMATCH ++C.UTF-8 "d" "[a-[.c.]]" NOMATCH ++C.UTF-8 "d" "[[.a.]-[.c.]]" NOMATCH ++ ++# B.6 019(C) ++C.UTF-8 "a" "[c-a]" NOMATCH ++C.UTF-8 "a" "[[.c.]-a]" NOMATCH ++C.UTF-8 "a" "[c-[.a.]]" NOMATCH ++C.UTF-8 "a" "[[.c.]-[.a.]]" NOMATCH ++C.UTF-8 "c" "[c-a]" NOMATCH ++C.UTF-8 "c" "[[.c.]-a]" NOMATCH ++C.UTF-8 "c" "[c-[.a.]]" NOMATCH ++C.UTF-8 "c" "[[.c.]-[.a.]]" NOMATCH ++ ++# B.6 020(C) ++C.UTF-8 "a" "[a-c0-9]" 0 ++C.UTF-8 "d" "[a-c0-9]" NOMATCH ++C.UTF-8 "B" "[a-c0-9]" NOMATCH ++ ++# B.6 021(C) ++C.UTF-8 "-" "[-a]" 0 ++C.UTF-8 "a" "[-b]" NOMATCH ++C.UTF-8 "-" "[!-a]" NOMATCH ++C.UTF-8 "a" "[!-b]" 0 ++C.UTF-8 "-" "[a-c-0-9]" 0 ++C.UTF-8 "b" "[a-c-0-9]" 0 ++C.UTF-8 "a:" "a[0-9-a]" NOMATCH ++C.UTF-8 "a:" "a[09-a]" 0 ++ ++# B.6 024(C) ++C.UTF-8 "" "*" 0 ++C.UTF-8 "asd/sdf" "*" 0 ++ ++# B.6 025(C) ++C.UTF-8 "as" "[a-c][a-z]" 0 ++C.UTF-8 "as" "??" 0 ++ ++# B.6 026(C) ++C.UTF-8 "asd/sdf" "as*df" 0 ++C.UTF-8 "asd/sdf" "as*" 0 ++C.UTF-8 "asd/sdf" "*df" 0 ++C.UTF-8 "asd/sdf" "as*dg" NOMATCH ++C.UTF-8 "asdf" "as*df" 0 ++C.UTF-8 "asdf" "as*df?" NOMATCH ++C.UTF-8 "asdf" "as*??" 0 ++C.UTF-8 "asdf" "a*???" 0 ++C.UTF-8 "asdf" "*????" 0 ++C.UTF-8 "asdf" "????*" 0 ++C.UTF-8 "asdf" "??*?" 0 ++ ++# B.6 027(C) ++C.UTF-8 "/" "/" 0 ++C.UTF-8 "/" "/*" 0 ++C.UTF-8 "/" "*/" 0 ++C.UTF-8 "/" "/?" NOMATCH ++C.UTF-8 "/" "?/" NOMATCH ++C.UTF-8 "/" "?" 0 ++C.UTF-8 "." "?" 0 ++C.UTF-8 "/." "??" 0 ++C.UTF-8 "/" "[!a-c]" 0 ++C.UTF-8 "." "[!a-c]" 0 ++ ++# B.6 029(C) ++C.UTF-8 "/" "/" 0 PATHNAME ++C.UTF-8 "//" "//" 0 PATHNAME ++C.UTF-8 "/.a" "/*" 0 PATHNAME ++C.UTF-8 "/.a" "/?a" 0 PATHNAME ++C.UTF-8 "/.a" "/[!a-z]a" 0 PATHNAME ++C.UTF-8 "/.a/.b" "/*/?b" 0 PATHNAME ++ ++# B.6 030(C) ++C.UTF-8 "/" "?" NOMATCH PATHNAME ++C.UTF-8 "/" "*" NOMATCH PATHNAME ++C.UTF-8 "a/b" "a?b" NOMATCH PATHNAME ++C.UTF-8 "/.a/.b" "/*b" NOMATCH PATHNAME ++ ++# B.6 031(C) ++C.UTF-8 "/$" "\\/\\$" 0 ++C.UTF-8 "/[" "\\/\\[" 0 ++C.UTF-8 "/[" "\\/[" 0 ++C.UTF-8 "/[]" "\\/\\[]" 0 ++ ++# B.6 032(C) ++C.UTF-8 "/$" "\\/\\$" NOMATCH NOESCAPE ++C.UTF-8 "/\\$" "\\/\\$" NOMATCH NOESCAPE ++C.UTF-8 "\\/\\$" "\\/\\$" 0 NOESCAPE ++ ++# B.6 033(C) ++C.UTF-8 ".asd" ".*" 0 PERIOD ++C.UTF-8 "/.asd" "*" 0 PERIOD ++C.UTF-8 "/as/.df" "*/?*f" 0 PERIOD ++C.UTF-8 "..asd" ".[!a-z]*" 0 PERIOD ++ ++# B.6 034(C) ++C.UTF-8 ".asd" "*" NOMATCH PERIOD ++C.UTF-8 ".asd" "?asd" NOMATCH PERIOD ++C.UTF-8 ".asd" "[!a-z]*" NOMATCH PERIOD ++ ++# B.6 035(C) ++C.UTF-8 "/." "/." 0 PATHNAME|PERIOD ++C.UTF-8 "/.a./.b." "/.*/.*" 0 PATHNAME|PERIOD ++C.UTF-8 "/.a./.b." "/.??/.??" 0 PATHNAME|PERIOD ++ ++# B.6 036(C) ++C.UTF-8 "/." "*" NOMATCH PATHNAME|PERIOD ++C.UTF-8 "/." "/*" NOMATCH PATHNAME|PERIOD ++C.UTF-8 "/." "/?" NOMATCH PATHNAME|PERIOD ++C.UTF-8 "/." "/[!a-z]" NOMATCH PATHNAME|PERIOD ++C.UTF-8 "/a./.b." "/*/*" NOMATCH PATHNAME|PERIOD ++C.UTF-8 "/a./.b." "/??/???" NOMATCH PATHNAME|PERIOD ++ ++# Some home-grown tests. ++C.UTF-8 "foobar" "foo*[abc]z" NOMATCH ++C.UTF-8 "foobaz" "foo*[abc][xyz]" 0 ++C.UTF-8 "foobaz" "foo?*[abc][xyz]" 0 ++C.UTF-8 "foobaz" "foo?*[abc][x/yz]" 0 ++C.UTF-8 "foobaz" "foo?*[abc]/[xyz]" NOMATCH PATHNAME ++C.UTF-8 "a" "a/" NOMATCH PATHNAME ++C.UTF-8 "a/" "a" NOMATCH PATHNAME ++C.UTF-8 "//a" "/a" NOMATCH PATHNAME ++C.UTF-8 "/a" "//a" NOMATCH PATHNAME ++C.UTF-8 "az" "[a-]z" 0 ++C.UTF-8 "bz" "[ab-]z" 0 ++C.UTF-8 "cz" "[ab-]z" NOMATCH ++C.UTF-8 "-z" "[ab-]z" 0 ++C.UTF-8 "az" "[-a]z" 0 ++C.UTF-8 "bz" "[-ab]z" 0 ++C.UTF-8 "cz" "[-ab]z" NOMATCH ++C.UTF-8 "-z" "[-ab]z" 0 ++C.UTF-8 "\\" "[\\\\-a]" 0 ++C.UTF-8 "_" "[\\\\-a]" 0 ++C.UTF-8 "a" "[\\\\-a]" 0 ++C.UTF-8 "-" "[\\\\-a]" NOMATCH ++C.UTF-8 "\\" "[\\]-a]" NOMATCH ++C.UTF-8 "_" "[\\]-a]" 0 ++C.UTF-8 "a" "[\\]-a]" 0 ++C.UTF-8 "]" "[\\]-a]" 0 ++C.UTF-8 "-" "[\\]-a]" NOMATCH ++C.UTF-8 "\\" "[!\\\\-a]" NOMATCH ++C.UTF-8 "_" "[!\\\\-a]" NOMATCH ++C.UTF-8 "a" "[!\\\\-a]" NOMATCH ++C.UTF-8 "-" "[!\\\\-a]" 0 ++C.UTF-8 "!" "[\\!-]" 0 ++C.UTF-8 "-" "[\\!-]" 0 ++C.UTF-8 "\\" "[\\!-]" NOMATCH ++C.UTF-8 "Z" "[Z-\\\\]" 0 ++C.UTF-8 "[" "[Z-\\\\]" 0 ++C.UTF-8 "\\" "[Z-\\\\]" 0 ++C.UTF-8 "-" "[Z-\\\\]" NOMATCH ++C.UTF-8 "Z" "[Z-\\]]" 0 ++C.UTF-8 "[" "[Z-\\]]" 0 ++C.UTF-8 "\\" "[Z-\\]]" 0 ++C.UTF-8 "]" "[Z-\\]]" 0 ++C.UTF-8 "-" "[Z-\\]]" NOMATCH ++ + # Following are tests outside the scope of IEEE 2003.2 since they are using + # locales other than the C locale. The main focus of the tests is on the + # handling of ranges and the recognition of character (vs bytes). +@@ -677,7 +1068,6 @@ C "x/y" "*" 0 PATHNAME|LEADING_DIR + C "x/y/z" "*" 0 PATHNAME|LEADING_DIR + C "x" "*x" 0 PATHNAME|LEADING_DIR + +-en_US.UTF-8 "\366.csv" "*.csv" 0 + C "x/y" "*x" 0 PATHNAME|LEADING_DIR + C "x/y/z" "*x" 0 PATHNAME|LEADING_DIR + C "x" "x*" 0 PATHNAME|LEADING_DIR +@@ -693,6 +1083,33 @@ C "x" "x?y" NOMATCH PATHNAME|LEADING_DIR + C "x/y" "x?y" NOMATCH PATHNAME|LEADING_DIR + C "x/y/z" "x?y" NOMATCH PATHNAME|LEADING_DIR + ++# Duplicate the "Test of GNU extensions." tests but for C.UTF-8. ++C.UTF-8 "x" "x" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x/y" "x" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x/y/z" "x" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x" "*" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x/y" "*" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x/y/z" "*" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x" "*x" 0 PATHNAME|LEADING_DIR ++ ++C.UTF-8 "x/y" "*x" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x/y/z" "*x" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x" "x*" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x/y" "x*" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x/y/z" "x*" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x" "a" NOMATCH PATHNAME|LEADING_DIR ++C.UTF-8 "x/y" "a" NOMATCH PATHNAME|LEADING_DIR ++C.UTF-8 "x/y/z" "a" NOMATCH PATHNAME|LEADING_DIR ++C.UTF-8 "x" "x/y" NOMATCH PATHNAME|LEADING_DIR ++C.UTF-8 "x/y" "x/y" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x/y/z" "x/y" 0 PATHNAME|LEADING_DIR ++C.UTF-8 "x" "x?y" NOMATCH PATHNAME|LEADING_DIR ++C.UTF-8 "x/y" "x?y" NOMATCH PATHNAME|LEADING_DIR ++C.UTF-8 "x/y/z" "x?y" NOMATCH PATHNAME|LEADING_DIR ++ ++# Bug 14185 ++en_US.UTF-8 "\366.csv" "*.csv" 0 ++ + # ksh style matching. + C "abcd" "?@(a|b)*@(c)d" 0 EXTMATCH + C "/dev/udp/129.22.8.102/45" "/dev/@(tcp|udp)/*/*" 0 PATHNAME|EXTMATCH +@@ -822,3 +1239,133 @@ C "" "" 0 + C "" "" 0 EXTMATCH + C "" "*([abc])" 0 EXTMATCH + C "" "?([abc])" 0 EXTMATCH ++ ++# Duplicate the "ksh style matching." for C.UTF-8. ++C.UTF-8 "abcd" "?@(a|b)*@(c)d" 0 EXTMATCH ++C.UTF-8 "/dev/udp/129.22.8.102/45" "/dev/@(tcp|udp)/*/*" 0 PATHNAME|EXTMATCH ++C.UTF-8 "12" "[1-9]*([0-9])" 0 EXTMATCH ++C.UTF-8 "12abc" "[1-9]*([0-9])" NOMATCH EXTMATCH ++C.UTF-8 "1" "[1-9]*([0-9])" 0 EXTMATCH ++C.UTF-8 "07" "+([0-7])" 0 EXTMATCH ++C.UTF-8 "0377" "+([0-7])" 0 EXTMATCH ++C.UTF-8 "09" "+([0-7])" NOMATCH EXTMATCH ++C.UTF-8 "paragraph" "para@(chute|graph)" 0 EXTMATCH ++C.UTF-8 "paramour" "para@(chute|graph)" NOMATCH EXTMATCH ++C.UTF-8 "para991" "para?([345]|99)1" 0 EXTMATCH ++C.UTF-8 "para381" "para?([345]|99)1" NOMATCH EXTMATCH ++C.UTF-8 "paragraph" "para*([0-9])" NOMATCH EXTMATCH ++C.UTF-8 "para" "para*([0-9])" 0 EXTMATCH ++C.UTF-8 "para13829383746592" "para*([0-9])" 0 EXTMATCH ++C.UTF-8 "paragraph" "para+([0-9])" NOMATCH EXTMATCH ++C.UTF-8 "para" "para+([0-9])" NOMATCH EXTMATCH ++C.UTF-8 "para987346523" "para+([0-9])" 0 EXTMATCH ++C.UTF-8 "paragraph" "para!(*.[0-9])" 0 EXTMATCH ++C.UTF-8 "para.38" "para!(*.[0-9])" 0 EXTMATCH ++C.UTF-8 "para.graph" "para!(*.[0-9])" 0 EXTMATCH ++C.UTF-8 "para39" "para!(*.[0-9])" 0 EXTMATCH ++C.UTF-8 "" "*(0|1|3|5|7|9)" 0 EXTMATCH ++C.UTF-8 "137577991" "*(0|1|3|5|7|9)" 0 EXTMATCH ++C.UTF-8 "2468" "*(0|1|3|5|7|9)" NOMATCH EXTMATCH ++C.UTF-8 "1358" "*(0|1|3|5|7|9)" NOMATCH EXTMATCH ++C.UTF-8 "file.c" "*.c?(c)" 0 EXTMATCH ++C.UTF-8 "file.C" "*.c?(c)" NOMATCH EXTMATCH ++C.UTF-8 "file.cc" "*.c?(c)" 0 EXTMATCH ++C.UTF-8 "file.ccc" "*.c?(c)" NOMATCH EXTMATCH ++C.UTF-8 "parse.y" "!(*.c|*.h|Makefile.in|config*|README)" 0 EXTMATCH ++C.UTF-8 "shell.c" "!(*.c|*.h|Makefile.in|config*|README)" NOMATCH EXTMATCH ++C.UTF-8 "Makefile" "!(*.c|*.h|Makefile.in|config*|README)" 0 EXTMATCH ++C.UTF-8 "VMS.FILE;1" "*\;[1-9]*([0-9])" 0 EXTMATCH ++C.UTF-8 "VMS.FILE;0" "*\;[1-9]*([0-9])" NOMATCH EXTMATCH ++C.UTF-8 "VMS.FILE;" "*\;[1-9]*([0-9])" NOMATCH EXTMATCH ++C.UTF-8 "VMS.FILE;139" "*\;[1-9]*([0-9])" 0 EXTMATCH ++C.UTF-8 "VMS.FILE;1N" "*\;[1-9]*([0-9])" NOMATCH EXTMATCH ++C.UTF-8 "abcfefg" "ab**(e|f)" 0 EXTMATCH ++C.UTF-8 "abcfefg" "ab**(e|f)g" 0 EXTMATCH ++C.UTF-8 "ab" "ab*+(e|f)" NOMATCH EXTMATCH ++C.UTF-8 "abef" "ab***ef" 0 EXTMATCH ++C.UTF-8 "abef" "ab**" 0 EXTMATCH ++C.UTF-8 "fofo" "*(f*(o))" 0 EXTMATCH ++C.UTF-8 "ffo" "*(f*(o))" 0 EXTMATCH ++C.UTF-8 "foooofo" "*(f*(o))" 0 EXTMATCH ++C.UTF-8 "foooofof" "*(f*(o))" 0 EXTMATCH ++C.UTF-8 "fooofoofofooo" "*(f*(o))" 0 EXTMATCH ++C.UTF-8 "foooofof" "*(f+(o))" NOMATCH EXTMATCH ++C.UTF-8 "xfoooofof" "*(f*(o))" NOMATCH EXTMATCH ++C.UTF-8 "foooofofx" "*(f*(o))" NOMATCH EXTMATCH ++C.UTF-8 "ofxoofxo" "*(*(of*(o)x)o)" 0 EXTMATCH ++C.UTF-8 "ofooofoofofooo" "*(f*(o))" NOMATCH EXTMATCH ++C.UTF-8 "foooxfooxfoxfooox" "*(f*(o)x)" 0 EXTMATCH ++C.UTF-8 "foooxfooxofoxfooox" "*(f*(o)x)" NOMATCH EXTMATCH ++C.UTF-8 "foooxfooxfxfooox" "*(f*(o)x)" 0 EXTMATCH ++C.UTF-8 "ofxoofxo" "*(*(of*(o)x)o)" 0 EXTMATCH ++C.UTF-8 "ofoooxoofxo" "*(*(of*(o)x)o)" 0 EXTMATCH ++C.UTF-8 "ofoooxoofxoofoooxoofxo" "*(*(of*(o)x)o)" 0 EXTMATCH ++C.UTF-8 "ofoooxoofxoofoooxoofxoo" "*(*(of*(o)x)o)" 0 EXTMATCH ++C.UTF-8 "ofoooxoofxoofoooxoofxofo" "*(*(of*(o)x)o)" NOMATCH EXTMATCH ++C.UTF-8 "ofoooxoofxoofoooxoofxooofxofxo" "*(*(of*(o)x)o)" 0 EXTMATCH ++C.UTF-8 "aac" "*(@(a))a@(c)" 0 EXTMATCH ++C.UTF-8 "ac" "*(@(a))a@(c)" 0 EXTMATCH ++C.UTF-8 "c" "*(@(a))a@(c)" NOMATCH EXTMATCH ++C.UTF-8 "aaac" "*(@(a))a@(c)" 0 EXTMATCH ++C.UTF-8 "baaac" "*(@(a))a@(c)" NOMATCH EXTMATCH ++C.UTF-8 "abcd" "?@(a|b)*@(c)d" 0 EXTMATCH ++C.UTF-8 "abcd" "@(ab|a*@(b))*(c)d" 0 EXTMATCH ++C.UTF-8 "acd" "@(ab|a*(b))*(c)d" 0 EXTMATCH ++C.UTF-8 "abbcd" "@(ab|a*(b))*(c)d" 0 EXTMATCH ++C.UTF-8 "effgz" "@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH ++C.UTF-8 "efgz" "@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH ++C.UTF-8 "egz" "@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH ++C.UTF-8 "egzefffgzbcdij" "*(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH ++C.UTF-8 "egz" "@(b+(c)d|e+(f)g?|?(h)i@(j|k))" NOMATCH EXTMATCH ++C.UTF-8 "ofoofo" "*(of+(o))" 0 EXTMATCH ++C.UTF-8 "oxfoxoxfox" "*(oxf+(ox))" 0 EXTMATCH ++C.UTF-8 "oxfoxfox" "*(oxf+(ox))" NOMATCH EXTMATCH ++C.UTF-8 "ofoofo" "*(of+(o)|f)" 0 EXTMATCH ++C.UTF-8 "foofoofo" "@(foo|f|fo)*(f|of+(o))" 0 EXTMATCH ++C.UTF-8 "oofooofo" "*(of|oof+(o))" 0 EXTMATCH ++C.UTF-8 "fffooofoooooffoofffooofff" "*(*(f)*(o))" 0 EXTMATCH ++C.UTF-8 "fofoofoofofoo" "*(fo|foo)" 0 EXTMATCH ++C.UTF-8 "foo" "!(x)" 0 EXTMATCH ++C.UTF-8 "foo" "!(x)*" 0 EXTMATCH ++C.UTF-8 "foo" "!(foo)" NOMATCH EXTMATCH ++C.UTF-8 "foo" "!(foo)*" 0 EXTMATCH ++C.UTF-8 "foobar" "!(foo)" 0 EXTMATCH ++C.UTF-8 "foobar" "!(foo)*" 0 EXTMATCH ++C.UTF-8 "moo.cow" "!(*.*).!(*.*)" 0 EXTMATCH ++C.UTF-8 "mad.moo.cow" "!(*.*).!(*.*)" NOMATCH EXTMATCH ++C.UTF-8 "mucca.pazza" "mu!(*(c))?.pa!(*(z))?" NOMATCH EXTMATCH ++C.UTF-8 "fff" "!(f)" 0 EXTMATCH ++C.UTF-8 "fff" "*(!(f))" 0 EXTMATCH ++C.UTF-8 "fff" "+(!(f))" 0 EXTMATCH ++C.UTF-8 "ooo" "!(f)" 0 EXTMATCH ++C.UTF-8 "ooo" "*(!(f))" 0 EXTMATCH ++C.UTF-8 "ooo" "+(!(f))" 0 EXTMATCH ++C.UTF-8 "foo" "!(f)" 0 EXTMATCH ++C.UTF-8 "foo" "*(!(f))" 0 EXTMATCH ++C.UTF-8 "foo" "+(!(f))" 0 EXTMATCH ++C.UTF-8 "f" "!(f)" NOMATCH EXTMATCH ++C.UTF-8 "f" "*(!(f))" NOMATCH EXTMATCH ++C.UTF-8 "f" "+(!(f))" NOMATCH EXTMATCH ++C.UTF-8 "foot" "@(!(z*)|*x)" 0 EXTMATCH ++C.UTF-8 "zoot" "@(!(z*)|*x)" NOMATCH EXTMATCH ++C.UTF-8 "foox" "@(!(z*)|*x)" 0 EXTMATCH ++C.UTF-8 "zoox" "@(!(z*)|*x)" 0 EXTMATCH ++C.UTF-8 "foo" "*(!(foo))" 0 EXTMATCH ++C.UTF-8 "foob" "!(foo)b*" NOMATCH EXTMATCH ++C.UTF-8 "foobb" "!(foo)b*" 0 EXTMATCH ++C.UTF-8 "[" "*([a[])" 0 EXTMATCH ++C.UTF-8 "]" "*([]a[])" 0 EXTMATCH ++C.UTF-8 "a" "*([]a[])" 0 EXTMATCH ++C.UTF-8 "b" "*([!]a[])" 0 EXTMATCH ++C.UTF-8 "[" "*([!]a[]|[[])" 0 EXTMATCH ++C.UTF-8 "]" "*([!]a[]|[]])" 0 EXTMATCH ++C.UTF-8 "[" "!([!]a[])" 0 EXTMATCH ++C.UTF-8 "]" "!([!]a[])" 0 EXTMATCH ++C.UTF-8 ")" "*([)])" 0 EXTMATCH ++C.UTF-8 "*" "*([*(])" 0 EXTMATCH ++C.UTF-8 "abcd" "*!(|a)cd" 0 EXTMATCH ++C.UTF-8 "ab/.a" "+([abc])/*" NOMATCH EXTMATCH|PATHNAME|PERIOD ++C.UTF-8 "" "" 0 ++C.UTF-8 "" "" 0 EXTMATCH ++C.UTF-8 "" "*([abc])" 0 EXTMATCH ++C.UTF-8 "" "?([abc])" 0 EXTMATCH +diff --git a/posix/tst-regcomp-truncated.c b/posix/tst-regcomp-truncated.c +index 84195fcd..da3f9779 100644 +--- a/posix/tst-regcomp-truncated.c ++++ b/posix/tst-regcomp-truncated.c +@@ -37,6 +37,7 @@ + static const char locales[][17] = + { + "C", ++ "C.UTF-8", + "en_US.UTF-8", + "de_DE.ISO-8859-1", + }; +diff --git a/posix/tst-regex.c b/posix/tst-regex.c +index e7c2b05e..531128de 100644 +--- a/posix/tst-regex.c ++++ b/posix/tst-regex.c +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include + + + #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 +@@ -58,7 +59,7 @@ do_test (void) + const char *file; + int fd; + struct stat st; +- int result; ++ int result = 0; + char *inmem; + char *outmem; + size_t inlen; +@@ -123,7 +124,7 @@ do_test (void) + + /* Run the actual tests. All tests are run in a single-byte and a + multi-byte locale. */ +- result = test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4); ++ result |= test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4); + result |= test_expr ("G.ran", 2, 3); + result |= test_expr ("G.\\{1\\}ran", 2, 3); + result |= test_expr ("G.*ran", 3, 44); +@@ -143,19 +144,33 @@ do_test (void) + static int + test_expr (const char *expr, int expected, int expectedicase) + { +- int result; ++ int result = 0; + char *inmem; + char *outmem; + size_t inlen; + size_t outlen; + char *uexpr; + +- /* First test: search with an UTF-8 locale. */ +- if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL) +- error (EXIT_FAILURE, 0, "cannot set locale de_DE.UTF-8"); ++ /* First test: search with basic C.UTF-8 locale. */ ++ printf ("INFO: Testing C.UTF-8.\n"); ++ xsetlocale (LC_ALL, "C.UTF-8"); + + printf ("\nTest \"%s\" with multi-byte locale\n", expr); +- result = run_test (expr, mem, memlen, 0, expected); ++ result |= run_test (expr, mem, memlen, 0, expected); ++ printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr); ++ result |= run_test (expr, mem, memlen, 1, expectedicase); ++ printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr); ++ result |= run_test_backwards (expr, mem, memlen, 0, expected); ++ printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n", ++ expr); ++ result |= run_test_backwards (expr, mem, memlen, 1, expectedicase); ++ ++ /* Second test: search with an UTF-8 locale. */ ++ printf ("INFO: Testing de_DE.UTF-8.\n"); ++ xsetlocale (LC_ALL, "de_DE.UTF-8"); ++ ++ printf ("\nTest \"%s\" with multi-byte locale\n", expr); ++ result |= run_test (expr, mem, memlen, 0, expected); + printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr); + result |= run_test (expr, mem, memlen, 1, expectedicase); + printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr); +@@ -165,8 +180,8 @@ test_expr (const char *expr, int expected, int expectedicase) + result |= run_test_backwards (expr, mem, memlen, 1, expectedicase); + + /* Second test: search with an ISO-8859-1 locale. */ +- if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL) +- error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1"); ++ printf ("INFO: Testing de_DE.ISO-8859-1.\n"); ++ xsetlocale (LC_ALL, "de_DE.ISO-8859-1"); + + inmem = (char *) expr; + inlen = strlen (expr); +-- +2.33.0 + diff --git a/backport-localedata-Adjust-C.UTF-8-to-align-with-C-POSIX.patch b/backport-localedata-Adjust-C.UTF-8-to-align-with-C-POSIX.patch new file mode 100644 index 0000000000000000000000000000000000000000..31a6cbb784939290dba2063e6b99cb1f724dc425 --- /dev/null +++ b/backport-localedata-Adjust-C.UTF-8-to-align-with-C-POSIX.patch @@ -0,0 +1,702 @@ +From 7e0ad15c0fbfe25435c1acd0ed3e9cedfbff2488 Mon Sep 17 00:00:00 2001 +From: Carlos O'Donell +Date: Mon, 31 Jan 2022 00:34:42 -0500 +Subject: [PATCH] localedata: Adjust C.UTF-8 to align with C/POSIX. + +We have had one downstream report from Canonical [1] that +an rrdtool test was broken by the differences in LC_TIME +that we had in the non-builtin C locale (C.UTF-8). If one +application has an issue there are going to be others, and +so with this commit we review and fix all the issues that +cause the builtin C locale to be different from C.UTF-8, +which includes: +* mon_decimal_point should be empty e.g. "" + - Depends on mon_decimal_point_wc fix. +* negative_sign should be empty e.g. "" +* week should be aligned with the builtin C/POSIX locale +* d_fmt corrected with escaped slashes e.g. "%m//%d//%y" +* yesstr and nostr should be empty e.g. "" +* country_ab2 and country_ab3 should be empty e.g. "" + +We bump LC_IDENTIFICATION version and adjust the date to +indicate the change in the locale. + +A new tst-c-utf8-consistency test is added to ensure +consistency between C/POSIX and C.UTF-8. + +Tested on x86_64 and i686 without regression. + +[1] https://sourceware.org/pipermail/libc-alpha/2022-January/135703.html + +Co-authored-by: Florian Weimer +Reviewed-by: Florian Weimer + +Conflict:NA +Reference:https://sourceware.org/git/?p=glibc.git;a=commit;h=7e0ad15c0fbfe25435c1acd0ed3e9cedfbff2488 +--- + localedata/Makefile | 30 +- + localedata/locales/C | 22 +- + localedata/tst-c-utf8-consistency.c | 539 ++++++++++++++++++++++++++++ + 3 files changed, 578 insertions(+), 13 deletions(-) + create mode 100644 localedata/tst-c-utf8-consistency.c + +diff --git a/localedata/Makefile b/localedata/Makefile +index 79db713925..9ae2e5c161 100644 +--- a/localedata/Makefile ++++ b/localedata/Makefile +@@ -155,11 +155,31 @@ locale_test_suite := tst_iswalnum tst_iswalpha tst_iswcntrl \ + tst_wcsxfrm tst_wctob tst_wctomb tst_wctrans \ + tst_wctype tst_wcwidth + +-tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \ +- tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \ +- tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \ +- tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \ +- tst-wctype tst-iconv-math-trans ++tests = \ ++ $(locale_test_suite) \ ++ bug-iconv-trans \ ++ bug-setlocale1 \ ++ bug-usesetlocale \ ++ tst-c-utf8-consistency \ ++ tst-digits \ ++ tst-iconv-math-trans \ ++ tst-leaks \ ++ tst-mbswcs1 \ ++ tst-mbswcs2 \ ++ tst-mbswcs3 \ ++ tst-mbswcs4 \ ++ tst-mbswcs5 \ ++ tst-mbswcs6 \ ++ tst-setlocale \ ++ tst-setlocale2 \ ++ tst-setlocale3 \ ++ tst-sscanf \ ++ tst-strfmon1 \ ++ tst-wctype \ ++ tst-xlocale1 \ ++ tst-xlocale2 \ ++ # tests ++ + tests-static = bug-setlocale1-static + tests += $(tests-static) + ifeq (yes,$(build-shared)) +diff --git a/localedata/locales/C b/localedata/locales/C +index ca801c79cf..fc0614e551 100644 +--- a/localedata/locales/C ++++ b/localedata/locales/C +@@ -12,8 +12,8 @@ tel "" + fax "" + language "" + territory "" +-revision "2.0" +-date "2020-06-28" ++revision "2.1" ++date "2022-01-30" + category "i18n:2012";LC_IDENTIFICATION + category "i18n:2012";LC_CTYPE + category "i18n:2012";LC_COLLATE +@@ -68,11 +68,11 @@ LC_MONETARY + % glibc/locale/C-monetary.c.). + int_curr_symbol "" + currency_symbol "" +-mon_decimal_point "." ++mon_decimal_point "" + mon_thousands_sep "" + mon_grouping -1 + positive_sign "" +-negative_sign "-" ++negative_sign "" + int_frac_digits -1 + frac_digits -1 + p_cs_precedes -1 +@@ -121,7 +121,9 @@ mon "January";"February";"March";"April";"May";"June";"July";/ + % + % ISO 8601 conforming applications should use the values 7, 19971201 (a + % Monday), and 4 (Thursday), respectively. +-week 7;19971201;4 ++% ++% This field is consciously aligned with the builtin C/POSIX locale. ++week 7;19971130;4 + first_weekday 1 + first_workday 2 + +@@ -129,7 +131,7 @@ first_workday 2 + d_t_fmt "%a %b %e %H:%M:%S %Y" + + % Appropriate date representation (%x) +-d_fmt "%m/%d/%y" ++d_fmt "%m//%d//%y" + + % Appropriate time representation (%X) + t_fmt "%H:%M:%S" +@@ -150,8 +152,8 @@ LC_MESSAGES + % + yesexpr "^[yY]" + noexpr "^[nN]" +-yesstr "Yes" +-nostr "No" ++yesstr "" ++nostr "" + END LC_MESSAGES + + LC_PAPER +@@ -175,6 +177,10 @@ LC_ADDRESS + % the LC_ADDRESS category. + % (also used in the built in C/POSIX locale in glibc/locale/C-address.c) + postal_fmt "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N" ++% The abbreviated 2 char and 3 char should be set to empty strings to ++% match the C/POSIX locale. ++country_ab2 "" ++country_ab3 "" + END LC_ADDRESS + + LC_TELEPHONE +diff --git a/localedata/tst-c-utf8-consistency.c b/localedata/tst-c-utf8-consistency.c +new file mode 100644 +index 0000000000..50feed3090 +--- /dev/null ++++ b/localedata/tst-c-utf8-consistency.c +@@ -0,0 +1,539 @@ ++/* Test that C/POSIX and C.UTF-8 are consistent. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* Initialized by do_test using newlocale. */ ++static locale_t c_utf8; ++ ++/* Set to true for second pass. */ ++static bool use_nl_langinfo_l; ++ ++static void ++switch_to_c (void) ++{ ++ if (setlocale (LC_ALL, "C") == NULL) ++ FAIL_EXIT1 ("setlocale (LC_ALL, \"C\")"); ++} ++ ++static void ++switch_to_c_utf8 (void) ++{ ++ if (setlocale (LC_ALL, "C.UTF-8") == NULL) ++ FAIL_EXIT1 ("setlocale (LC_ALL, \"C.UTF-8\")"); ++} ++ ++static char * ++str (nl_item item) ++{ ++ if (!use_nl_langinfo_l) ++ switch_to_c (); ++ return nl_langinfo (item); ++} ++ ++static char * ++str_utf8 (nl_item item) ++{ ++ if (use_nl_langinfo_l) ++ return nl_langinfo_l (item, c_utf8); ++ else ++ { ++ switch_to_c_utf8 (); ++ return nl_langinfo (item); ++ } ++} ++ ++static wchar_t * ++wstr (nl_item item) ++{ ++ return (wchar_t *) str (item); ++} ++ ++static wchar_t * ++wstr_utf8 (nl_item item) ++{ ++ return (wchar_t *) str_utf8 (item); ++} ++ ++static int ++byte (nl_item item) ++{ ++ return (signed char) *str (item); ++} ++ ++static int ++byte_utf8 (nl_item item) ++{ ++ return (signed char) *str_utf8 (item); ++} ++ ++static int ++word (nl_item item) ++{ ++ union ++ { ++ char *ptr; ++ int word; ++ } u; ++ u.ptr = str (item); ++ return u.word; ++} ++ ++static int ++word_utf8 (nl_item item) ++{ ++ union ++ { ++ char *ptr; ++ int word; ++ } u; ++ u.ptr = str_utf8 (item); ++ return u.word; ++} ++ ++static void ++one_pass (void) ++{ ++ /* LC_TIME. */ ++ TEST_COMPARE_STRING (str (ABDAY_1), str_utf8 (ABDAY_1)); ++ TEST_COMPARE_STRING (str (ABDAY_2), str_utf8 (ABDAY_2)); ++ TEST_COMPARE_STRING (str (ABDAY_3), str_utf8 (ABDAY_3)); ++ TEST_COMPARE_STRING (str (ABDAY_4), str_utf8 (ABDAY_4)); ++ TEST_COMPARE_STRING (str (ABDAY_5), str_utf8 (ABDAY_5)); ++ TEST_COMPARE_STRING (str (ABDAY_6), str_utf8 (ABDAY_6)); ++ TEST_COMPARE_STRING (str (ABDAY_7), str_utf8 (ABDAY_7)); ++ ++ TEST_COMPARE_STRING (str (DAY_1), str_utf8 (DAY_1)); ++ TEST_COMPARE_STRING (str (DAY_2), str_utf8 (DAY_2)); ++ TEST_COMPARE_STRING (str (DAY_3), str_utf8 (DAY_3)); ++ TEST_COMPARE_STRING (str (DAY_4), str_utf8 (DAY_4)); ++ TEST_COMPARE_STRING (str (DAY_5), str_utf8 (DAY_5)); ++ TEST_COMPARE_STRING (str (DAY_6), str_utf8 (DAY_6)); ++ TEST_COMPARE_STRING (str (DAY_7), str_utf8 (DAY_7)); ++ ++ TEST_COMPARE_STRING (str (ABMON_1), str_utf8 (ABMON_1)); ++ TEST_COMPARE_STRING (str (ABMON_2), str_utf8 (ABMON_2)); ++ TEST_COMPARE_STRING (str (ABMON_3), str_utf8 (ABMON_3)); ++ TEST_COMPARE_STRING (str (ABMON_4), str_utf8 (ABMON_4)); ++ TEST_COMPARE_STRING (str (ABMON_5), str_utf8 (ABMON_5)); ++ TEST_COMPARE_STRING (str (ABMON_6), str_utf8 (ABMON_6)); ++ TEST_COMPARE_STRING (str (ABMON_7), str_utf8 (ABMON_7)); ++ TEST_COMPARE_STRING (str (ABMON_8), str_utf8 (ABMON_8)); ++ TEST_COMPARE_STRING (str (ABMON_9), str_utf8 (ABMON_9)); ++ TEST_COMPARE_STRING (str (ABMON_10), str_utf8 (ABMON_10)); ++ TEST_COMPARE_STRING (str (ABMON_11), str_utf8 (ABMON_11)); ++ TEST_COMPARE_STRING (str (ABMON_12), str_utf8 (ABMON_12)); ++ ++ TEST_COMPARE_STRING (str (MON_1), str_utf8 (MON_1)); ++ TEST_COMPARE_STRING (str (MON_2), str_utf8 (MON_2)); ++ TEST_COMPARE_STRING (str (MON_3), str_utf8 (MON_3)); ++ TEST_COMPARE_STRING (str (MON_4), str_utf8 (MON_4)); ++ TEST_COMPARE_STRING (str (MON_5), str_utf8 (MON_5)); ++ TEST_COMPARE_STRING (str (MON_6), str_utf8 (MON_6)); ++ TEST_COMPARE_STRING (str (MON_7), str_utf8 (MON_7)); ++ TEST_COMPARE_STRING (str (MON_8), str_utf8 (MON_8)); ++ TEST_COMPARE_STRING (str (MON_9), str_utf8 (MON_9)); ++ TEST_COMPARE_STRING (str (MON_10), str_utf8 (MON_10)); ++ TEST_COMPARE_STRING (str (MON_11), str_utf8 (MON_11)); ++ TEST_COMPARE_STRING (str (MON_12), str_utf8 (MON_12)); ++ ++ TEST_COMPARE_STRING (str (AM_STR), str_utf8 (AM_STR)); ++ TEST_COMPARE_STRING (str (PM_STR), str_utf8 (PM_STR)); ++ ++ TEST_COMPARE_STRING (str (D_T_FMT), str_utf8 (D_T_FMT)); ++ TEST_COMPARE_STRING (str (D_FMT), str_utf8 (D_FMT)); ++ TEST_COMPARE_STRING (str (T_FMT), str_utf8 (T_FMT)); ++ TEST_COMPARE_STRING (str (T_FMT_AMPM), ++ str_utf8 (T_FMT_AMPM)); ++ ++ TEST_COMPARE_STRING (str (ERA), str_utf8 (ERA)); ++ TEST_COMPARE_STRING (str (ERA_YEAR), str_utf8 (ERA_YEAR)); ++ TEST_COMPARE_STRING (str (ERA_D_FMT), str_utf8 (ERA_D_FMT)); ++ TEST_COMPARE_STRING (str (ALT_DIGITS), str_utf8 (ALT_DIGITS)); ++ TEST_COMPARE_STRING (str (ERA_D_T_FMT), str_utf8 (ERA_D_T_FMT)); ++ TEST_COMPARE_STRING (str (ERA_T_FMT), str_utf8 (ERA_T_FMT)); ++ TEST_COMPARE (word (_NL_TIME_ERA_NUM_ENTRIES), ++ word_utf8 (_NL_TIME_ERA_NUM_ENTRIES)); ++ /* No array elements, so nothing to compare for _NL_TIME_ERA_ENTRIES. */ ++ TEST_COMPARE (word (_NL_TIME_ERA_NUM_ENTRIES), 0); ++ ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABDAY_1), wstr_utf8 (_NL_WABDAY_1)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABDAY_2), wstr_utf8 (_NL_WABDAY_2)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABDAY_3), wstr_utf8 (_NL_WABDAY_3)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABDAY_4), wstr_utf8 (_NL_WABDAY_4)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABDAY_5), wstr_utf8 (_NL_WABDAY_5)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABDAY_6), wstr_utf8 (_NL_WABDAY_6)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABDAY_7), wstr_utf8 (_NL_WABDAY_7)); ++ ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WDAY_1), wstr_utf8 (_NL_WDAY_1)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WDAY_2), wstr_utf8 (_NL_WDAY_2)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WDAY_3), wstr_utf8 (_NL_WDAY_3)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WDAY_4), wstr_utf8 (_NL_WDAY_4)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WDAY_5), wstr_utf8 (_NL_WDAY_5)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WDAY_6), wstr_utf8 (_NL_WDAY_6)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WDAY_7), wstr_utf8 (_NL_WDAY_7)); ++ ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_1), wstr_utf8 (_NL_WABMON_1)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_2), wstr_utf8 (_NL_WABMON_2)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_3), wstr_utf8 (_NL_WABMON_3)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_4), wstr_utf8 (_NL_WABMON_4)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_5), wstr_utf8 (_NL_WABMON_5)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_6), wstr_utf8 (_NL_WABMON_6)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_7), wstr_utf8 (_NL_WABMON_7)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_8), wstr_utf8 (_NL_WABMON_8)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_9), wstr_utf8 (_NL_WABMON_9)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_10), wstr_utf8 (_NL_WABMON_10)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_11), wstr_utf8 (_NL_WABMON_11)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABMON_12), wstr_utf8 (_NL_WABMON_12)); ++ ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_1), wstr_utf8 (_NL_WMON_1)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_2), wstr_utf8 (_NL_WMON_2)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_3), wstr_utf8 (_NL_WMON_3)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_4), wstr_utf8 (_NL_WMON_4)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_5), wstr_utf8 (_NL_WMON_5)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_6), wstr_utf8 (_NL_WMON_6)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_7), wstr_utf8 (_NL_WMON_7)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_8), wstr_utf8 (_NL_WMON_8)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_9), wstr_utf8 (_NL_WMON_9)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_10), wstr_utf8 (_NL_WMON_10)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_11), wstr_utf8 (_NL_WMON_11)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WMON_12), wstr_utf8 (_NL_WMON_12)); ++ ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WAM_STR), wstr_utf8 (_NL_WAM_STR)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WPM_STR), wstr_utf8 (_NL_WPM_STR)); ++ ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WD_T_FMT), wstr_utf8 (_NL_WD_T_FMT)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WD_FMT), wstr_utf8 (_NL_WD_FMT)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WT_FMT), wstr_utf8 (_NL_WT_FMT)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WT_FMT_AMPM), ++ wstr_utf8 (_NL_WT_FMT_AMPM)); ++ ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WERA_YEAR), wstr_utf8 (_NL_WERA_YEAR)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WERA_D_FMT), wstr_utf8 (_NL_WERA_D_FMT)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALT_DIGITS), ++ wstr_utf8 (_NL_WALT_DIGITS)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WERA_D_T_FMT), ++ wstr_utf8 (_NL_WERA_D_T_FMT)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WERA_T_FMT), wstr_utf8 (_NL_WERA_T_FMT)); ++ ++ /* This is somewhat inconsistent, but see locale/categories.def. */ ++ TEST_COMPARE (byte (_NL_TIME_WEEK_NDAYS), byte_utf8 (_NL_TIME_WEEK_NDAYS)); ++ TEST_COMPARE (word (_NL_TIME_WEEK_1STDAY), ++ word_utf8 (_NL_TIME_WEEK_1STDAY)); ++ TEST_COMPARE (byte (_NL_TIME_WEEK_1STWEEK), ++ byte_utf8 (_NL_TIME_WEEK_1STWEEK)); ++ TEST_COMPARE (byte (_NL_TIME_FIRST_WEEKDAY), ++ byte_utf8 (_NL_TIME_FIRST_WEEKDAY)); ++ TEST_COMPARE (byte (_NL_TIME_FIRST_WORKDAY), ++ byte_utf8 (_NL_TIME_FIRST_WORKDAY)); ++ TEST_COMPARE (byte (_NL_TIME_CAL_DIRECTION), ++ byte_utf8 (_NL_TIME_CAL_DIRECTION)); ++ TEST_COMPARE_STRING (str (_NL_TIME_TIMEZONE), str_utf8 (_NL_TIME_TIMEZONE)); ++ ++ TEST_COMPARE_STRING (str (_DATE_FMT), str_utf8 (_DATE_FMT)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_W_DATE_FMT), wstr_utf8 (_NL_W_DATE_FMT)); ++ ++ /* Expected difference. */ ++ TEST_COMPARE_STRING (str (_NL_TIME_CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (_NL_TIME_CODESET), "UTF-8"); ++ ++ TEST_COMPARE_STRING (str (ALTMON_1), str_utf8 (ALTMON_1)); ++ TEST_COMPARE_STRING (str (ALTMON_2), str_utf8 (ALTMON_2)); ++ TEST_COMPARE_STRING (str (ALTMON_3), str_utf8 (ALTMON_3)); ++ TEST_COMPARE_STRING (str (ALTMON_4), str_utf8 (ALTMON_4)); ++ TEST_COMPARE_STRING (str (ALTMON_5), str_utf8 (ALTMON_5)); ++ TEST_COMPARE_STRING (str (ALTMON_6), str_utf8 (ALTMON_6)); ++ TEST_COMPARE_STRING (str (ALTMON_7), str_utf8 (ALTMON_7)); ++ TEST_COMPARE_STRING (str (ALTMON_8), str_utf8 (ALTMON_8)); ++ TEST_COMPARE_STRING (str (ALTMON_9), str_utf8 (ALTMON_9)); ++ TEST_COMPARE_STRING (str (ALTMON_10), str_utf8 (ALTMON_10)); ++ TEST_COMPARE_STRING (str (ALTMON_11), str_utf8 (ALTMON_11)); ++ TEST_COMPARE_STRING (str (ALTMON_12), str_utf8 (ALTMON_12)); ++ ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_1), wstr_utf8 (_NL_WALTMON_1)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_2), wstr_utf8 (_NL_WALTMON_2)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_3), wstr_utf8 (_NL_WALTMON_3)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_4), wstr_utf8 (_NL_WALTMON_4)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_5), wstr_utf8 (_NL_WALTMON_5)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_6), wstr_utf8 (_NL_WALTMON_6)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_7), wstr_utf8 (_NL_WALTMON_7)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_8), wstr_utf8 (_NL_WALTMON_8)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_9), wstr_utf8 (_NL_WALTMON_9)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_10), wstr_utf8 (_NL_WALTMON_10)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_11), wstr_utf8 (_NL_WALTMON_11)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WALTMON_12), wstr_utf8 (_NL_WALTMON_12)); ++ ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_1), str_utf8 (_NL_ABALTMON_1)); ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_2), str_utf8 (_NL_ABALTMON_2)); ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_3), str_utf8 (_NL_ABALTMON_3)); ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_4), str_utf8 (_NL_ABALTMON_4)); ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_5), str_utf8 (_NL_ABALTMON_5)); ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_6), str_utf8 (_NL_ABALTMON_6)); ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_7), str_utf8 (_NL_ABALTMON_7)); ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_8), str_utf8 (_NL_ABALTMON_8)); ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_9), str_utf8 (_NL_ABALTMON_9)); ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_10), str_utf8 (_NL_ABALTMON_10)); ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_11), str_utf8 (_NL_ABALTMON_11)); ++ TEST_COMPARE_STRING (str (_NL_ABALTMON_12), str_utf8 (_NL_ABALTMON_12)); ++ ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_1), ++ wstr_utf8 (_NL_WABALTMON_1)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_2), ++ wstr_utf8 (_NL_WABALTMON_2)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_3), ++ wstr_utf8 (_NL_WABALTMON_3)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_4), ++ wstr_utf8 (_NL_WABALTMON_4)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_5), ++ wstr_utf8 (_NL_WABALTMON_5)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_6), ++ wstr_utf8 (_NL_WABALTMON_6)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_7), ++ wstr_utf8 (_NL_WABALTMON_7)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_8), ++ wstr_utf8 (_NL_WABALTMON_8)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_9), ++ wstr_utf8 (_NL_WABALTMON_9)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_10), ++ wstr_utf8 (_NL_WABALTMON_10)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_11), ++ wstr_utf8 (_NL_WABALTMON_11)); ++ TEST_COMPARE_STRING_WIDE (wstr (_NL_WABALTMON_12), ++ wstr_utf8 (_NL_WABALTMON_12)); ++ ++ /* LC_COLLATE. Mostly untested, only expected differences. */ ++ TEST_COMPARE_STRING (str (_NL_COLLATE_CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (_NL_COLLATE_CODESET), "UTF-8"); ++ ++ /* LC_CTYPE. Mostly untested, only expected differences. */ ++ TEST_COMPARE_STRING (str (CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (CODESET), "UTF-8"); ++ ++ /* LC_MONETARY. */ ++ TEST_COMPARE_STRING (str (INT_CURR_SYMBOL), str_utf8 (INT_CURR_SYMBOL)); ++ TEST_COMPARE_STRING (str (CURRENCY_SYMBOL), str_utf8 (CURRENCY_SYMBOL)); ++ TEST_COMPARE_STRING (str (MON_DECIMAL_POINT), str_utf8 (MON_DECIMAL_POINT)); ++ TEST_COMPARE_STRING (str (MON_THOUSANDS_SEP), str_utf8 (MON_THOUSANDS_SEP)); ++ TEST_COMPARE_STRING (str (MON_GROUPING), str_utf8 (MON_GROUPING)); ++ TEST_COMPARE_STRING (str (POSITIVE_SIGN), str_utf8 (POSITIVE_SIGN)); ++ TEST_COMPARE_STRING (str (NEGATIVE_SIGN), str_utf8 (NEGATIVE_SIGN)); ++ TEST_COMPARE (byte (INT_FRAC_DIGITS), byte_utf8 (INT_FRAC_DIGITS)); ++ TEST_COMPARE (byte (FRAC_DIGITS), byte_utf8 (FRAC_DIGITS)); ++ TEST_COMPARE (byte (P_CS_PRECEDES), byte_utf8 (P_CS_PRECEDES)); ++ TEST_COMPARE (byte (P_SEP_BY_SPACE), byte_utf8 (P_SEP_BY_SPACE)); ++ TEST_COMPARE (byte (N_CS_PRECEDES), byte_utf8 (N_CS_PRECEDES)); ++ TEST_COMPARE (byte (N_SEP_BY_SPACE), byte_utf8 (N_SEP_BY_SPACE)); ++ TEST_COMPARE (byte (P_SIGN_POSN), byte_utf8 (P_SIGN_POSN)); ++ TEST_COMPARE (byte (N_SIGN_POSN), byte_utf8 (N_SIGN_POSN)); ++ TEST_COMPARE_STRING (str (CRNCYSTR), str_utf8 (CRNCYSTR)); ++ TEST_COMPARE (byte (INT_P_CS_PRECEDES), byte_utf8 (INT_P_CS_PRECEDES)); ++ TEST_COMPARE (byte (INT_P_SEP_BY_SPACE), byte_utf8 (INT_P_SEP_BY_SPACE)); ++ TEST_COMPARE (byte (INT_N_CS_PRECEDES), byte_utf8 (INT_N_CS_PRECEDES)); ++ TEST_COMPARE (byte (INT_N_SEP_BY_SPACE), byte_utf8 (INT_N_SEP_BY_SPACE)); ++ TEST_COMPARE (byte (INT_P_SIGN_POSN), byte_utf8 (INT_P_SIGN_POSN)); ++ TEST_COMPARE (byte (INT_N_SIGN_POSN), byte_utf8 (INT_N_SIGN_POSN)); ++ TEST_COMPARE_STRING (str (_NL_MONETARY_DUO_INT_CURR_SYMBOL), ++ str_utf8 (_NL_MONETARY_DUO_INT_CURR_SYMBOL)); ++ TEST_COMPARE_STRING (str (_NL_MONETARY_DUO_CURRENCY_SYMBOL), ++ str_utf8 (_NL_MONETARY_DUO_CURRENCY_SYMBOL)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_INT_FRAC_DIGITS), ++ byte_utf8 (_NL_MONETARY_DUO_INT_FRAC_DIGITS)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_FRAC_DIGITS), ++ byte_utf8 (_NL_MONETARY_DUO_FRAC_DIGITS)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_P_CS_PRECEDES), ++ byte_utf8 (_NL_MONETARY_DUO_P_CS_PRECEDES)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_P_SEP_BY_SPACE), ++ byte_utf8 (_NL_MONETARY_DUO_P_SEP_BY_SPACE)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_N_CS_PRECEDES), ++ byte_utf8 (_NL_MONETARY_DUO_N_CS_PRECEDES)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_N_SEP_BY_SPACE), ++ byte_utf8 (_NL_MONETARY_DUO_N_SEP_BY_SPACE)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_INT_P_CS_PRECEDES), ++ byte_utf8 (_NL_MONETARY_DUO_INT_P_CS_PRECEDES)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_INT_P_SEP_BY_SPACE), ++ byte_utf8 (_NL_MONETARY_DUO_INT_P_SEP_BY_SPACE)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_INT_N_CS_PRECEDES), ++ byte_utf8 (_NL_MONETARY_DUO_INT_N_CS_PRECEDES)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_INT_N_SEP_BY_SPACE), ++ byte_utf8 (_NL_MONETARY_DUO_INT_N_SEP_BY_SPACE)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_INT_P_SIGN_POSN), ++ byte_utf8 (_NL_MONETARY_DUO_INT_P_SIGN_POSN)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_INT_N_SIGN_POSN), ++ byte_utf8 (_NL_MONETARY_DUO_INT_N_SIGN_POSN)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_P_SIGN_POSN), ++ byte_utf8 (_NL_MONETARY_DUO_P_SIGN_POSN)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_N_SIGN_POSN), ++ byte_utf8 (_NL_MONETARY_DUO_N_SIGN_POSN)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_INT_P_SIGN_POSN), ++ byte_utf8 (_NL_MONETARY_DUO_INT_P_SIGN_POSN)); ++ TEST_COMPARE (byte (_NL_MONETARY_DUO_INT_N_SIGN_POSN), ++ byte_utf8 (_NL_MONETARY_DUO_INT_N_SIGN_POSN)); ++ TEST_COMPARE (word (_NL_MONETARY_UNO_VALID_FROM), ++ word_utf8 (_NL_MONETARY_UNO_VALID_FROM)); ++ TEST_COMPARE (word (_NL_MONETARY_UNO_VALID_TO), ++ word_utf8 (_NL_MONETARY_UNO_VALID_TO)); ++ TEST_COMPARE (word (_NL_MONETARY_DUO_VALID_FROM), ++ word_utf8 (_NL_MONETARY_DUO_VALID_FROM)); ++ TEST_COMPARE (word (_NL_MONETARY_DUO_VALID_TO), ++ word_utf8 (_NL_MONETARY_DUO_VALID_TO)); ++ /* _NL_MONETARY_CONVERSION_RATE cannot be tested (word array). */ ++ TEST_COMPARE (word (_NL_MONETARY_DECIMAL_POINT_WC), ++ word_utf8 (_NL_MONETARY_DECIMAL_POINT_WC)); ++ TEST_COMPARE (word (_NL_MONETARY_THOUSANDS_SEP_WC), ++ word_utf8 (_NL_MONETARY_THOUSANDS_SEP_WC)); ++ /* Expected difference. */ ++ TEST_COMPARE_STRING (str (_NL_MONETARY_CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (_NL_MONETARY_CODESET), "UTF-8"); ++ ++ /* LC_NUMERIC. */ ++ ++ TEST_COMPARE_STRING (str (DECIMAL_POINT), str_utf8 (DECIMAL_POINT)); ++ TEST_COMPARE_STRING (str (RADIXCHAR), str_utf8 (RADIXCHAR)); ++ TEST_COMPARE_STRING (str (THOUSANDS_SEP), str_utf8 (THOUSANDS_SEP)); ++ TEST_COMPARE_STRING (str (THOUSEP), str_utf8 (THOUSEP)); ++ TEST_COMPARE_STRING (str (GROUPING), str_utf8 (GROUPING)); ++ TEST_COMPARE (word (_NL_NUMERIC_DECIMAL_POINT_WC), ++ word_utf8 (_NL_NUMERIC_DECIMAL_POINT_WC)); ++ TEST_COMPARE (word (_NL_NUMERIC_THOUSANDS_SEP_WC), ++ word_utf8 (_NL_NUMERIC_THOUSANDS_SEP_WC)); ++ /* Expected difference. */ ++ TEST_COMPARE_STRING (str (_NL_NUMERIC_CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (_NL_NUMERIC_CODESET), "UTF-8"); ++ ++ /* LC_MESSAGES. */ ++ ++ TEST_COMPARE_STRING (str (YESEXPR), str_utf8 (YESEXPR)); ++ TEST_COMPARE_STRING (str (NOEXPR), str_utf8 (NOEXPR)); ++ TEST_COMPARE_STRING (str (YESSTR), str_utf8 (YESSTR)); ++ TEST_COMPARE_STRING (str (NOSTR), str_utf8 (NOSTR)); ++ /* Expected difference. */ ++ TEST_COMPARE_STRING (str (_NL_MESSAGES_CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (_NL_MESSAGES_CODESET), "UTF-8"); ++ ++ /* LC_PAPER. */ ++ ++ TEST_COMPARE (word (_NL_PAPER_HEIGHT), word_utf8 (_NL_PAPER_HEIGHT)); ++ TEST_COMPARE (word (_NL_PAPER_WIDTH), word_utf8 (_NL_PAPER_WIDTH)); ++ /* Expected difference. */ ++ TEST_COMPARE_STRING (str (_NL_PAPER_CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (_NL_PAPER_CODESET), "UTF-8"); ++ ++ /* LC_NAME. */ ++ ++ TEST_COMPARE_STRING (str (_NL_NAME_NAME_FMT), ++ str_utf8 (_NL_NAME_NAME_FMT)); ++ TEST_COMPARE_STRING (str (_NL_NAME_NAME_GEN), ++ str_utf8 (_NL_NAME_NAME_GEN)); ++ TEST_COMPARE_STRING (str (_NL_NAME_NAME_MR), ++ str_utf8 (_NL_NAME_NAME_MR)); ++ TEST_COMPARE_STRING (str (_NL_NAME_NAME_MRS), ++ str_utf8 (_NL_NAME_NAME_MRS)); ++ TEST_COMPARE_STRING (str (_NL_NAME_NAME_MISS), ++ str_utf8 (_NL_NAME_NAME_MISS)); ++ TEST_COMPARE_STRING (str (_NL_NAME_NAME_MS), ++ str_utf8 (_NL_NAME_NAME_MS)); ++ /* Expected difference. */ ++ TEST_COMPARE_STRING (str (_NL_NAME_CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (_NL_NAME_CODESET), "UTF-8"); ++ ++ /* LC_ADDRESS. */ ++ ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_POSTAL_FMT), ++ str_utf8 (_NL_ADDRESS_POSTAL_FMT)); ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_COUNTRY_NAME), ++ str_utf8 (_NL_ADDRESS_COUNTRY_NAME)); ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_COUNTRY_POST), ++ str_utf8 (_NL_ADDRESS_COUNTRY_POST)); ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_COUNTRY_AB2), ++ str_utf8 (_NL_ADDRESS_COUNTRY_AB2)); ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_COUNTRY_AB3), ++ str_utf8 (_NL_ADDRESS_COUNTRY_AB3)); ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_COUNTRY_CAR), ++ str_utf8 (_NL_ADDRESS_COUNTRY_CAR)); ++ TEST_COMPARE (word (_NL_ADDRESS_COUNTRY_NUM), ++ word_utf8 (_NL_ADDRESS_COUNTRY_NUM)); ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_COUNTRY_ISBN), ++ str_utf8 (_NL_ADDRESS_COUNTRY_ISBN)); ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_LANG_NAME), ++ str_utf8 (_NL_ADDRESS_LANG_NAME)); ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_LANG_AB), ++ str_utf8 (_NL_ADDRESS_LANG_AB)); ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_LANG_TERM), ++ str_utf8 (_NL_ADDRESS_LANG_TERM)); ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_LANG_LIB), ++ str_utf8 (_NL_ADDRESS_LANG_LIB)); ++ /* Expected difference. */ ++ TEST_COMPARE_STRING (str (_NL_ADDRESS_CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (_NL_ADDRESS_CODESET), "UTF-8"); ++ ++ /* LC_TELEPHONE. */ ++ ++ TEST_COMPARE_STRING (str (_NL_TELEPHONE_TEL_INT_FMT), ++ str_utf8 (_NL_TELEPHONE_TEL_INT_FMT)); ++ TEST_COMPARE_STRING (str (_NL_TELEPHONE_TEL_DOM_FMT), ++ str_utf8 (_NL_TELEPHONE_TEL_DOM_FMT)); ++ TEST_COMPARE_STRING (str (_NL_TELEPHONE_INT_SELECT), ++ str_utf8 (_NL_TELEPHONE_INT_SELECT)); ++ TEST_COMPARE_STRING (str (_NL_TELEPHONE_INT_PREFIX), ++ str_utf8 (_NL_TELEPHONE_INT_PREFIX)); ++ /* Expected difference. */ ++ TEST_COMPARE_STRING (str (_NL_TELEPHONE_CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (_NL_TELEPHONE_CODESET), "UTF-8"); ++ ++ /* LC_MEASUREMENT. */ ++ ++ TEST_COMPARE (byte (_NL_MEASUREMENT_MEASUREMENT), ++ byte_utf8 (_NL_MEASUREMENT_MEASUREMENT)); ++ /* Expected difference. */ ++ TEST_COMPARE_STRING (str (_NL_MEASUREMENT_CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (_NL_MEASUREMENT_CODESET), "UTF-8"); ++ ++ /* LC_IDENTIFICATION is skipped since C.UTF-8 is distinct from C. */ ++ ++ /* _NL_IDENTIFICATION_CATEGORY cannot be tested because it is a ++ string array. */ ++ /* Expected difference. */ ++ TEST_COMPARE_STRING (str (_NL_IDENTIFICATION_CODESET), "ANSI_X3.4-1968"); ++ TEST_COMPARE_STRING (str_utf8 (_NL_IDENTIFICATION_CODESET), "UTF-8"); ++} ++ ++static int ++do_test (void) ++{ ++ puts ("info: using setlocale and nl_langinfo"); ++ one_pass (); ++ ++ puts ("info: using nl_langinfo_l"); ++ ++ c_utf8 = newlocale (LC_ALL_MASK, "C.UTF-8", (locale_t) 0); ++ TEST_VERIFY_EXIT (c_utf8 != (locale_t) 0); ++ ++ switch_to_c (); ++ use_nl_langinfo_l = true; ++ one_pass (); ++ ++ freelocale (c_utf8); ++ ++ return 0; ++} ++ ++#include +-- +2.28.0.windows.1 + diff --git a/backport-localedef-Fix-handling-of-empty-mon_decimal_point-Bu.patch b/backport-localedef-Fix-handling-of-empty-mon_decimal_point-Bu.patch new file mode 100644 index 0000000000000000000000000000000000000000..bb03288203ae937cf56fb0132252a2ad0a22a6e3 --- /dev/null +++ b/backport-localedef-Fix-handling-of-empty-mon_decimal_point-Bu.patch @@ -0,0 +1,74 @@ +From 1d8e3a2c6636cf0b1b8fa2f869cef6ec10726933 Mon Sep 17 00:00:00 2001 +From: Carlos O'Donell +Date: Mon, 31 Jan 2022 00:34:41 -0500 +Subject: [PATCH] localedef: Fix handling of empty mon_decimal_point (Bug + 28847) + +The handling of mon_decimal_point is incorrect when it comes to +handling the empty "" value. The existing parser in monetary_read() +will correctly handle setting the non-wide-character value and the +wide-character value e.g. STR_ELEM_WC(mon_decimal_point) if they are +set in the locale definition. However, in monetary_finish() we have +conflicting TEST_ELEM() which sets a default value (if the locale +definition doesn't include one), and subsequent code which looks for +mon_decimal_point to be NULL to issue a specific error message and set +the defaults. The latter is unused because TEST_ELEM() always sets a +default. The simplest solution is to remove the TEST_ELEM() check, +and allow the existing check to look to see if mon_decimal_point is +NULL and set an appropriate default. The final fix is to move the +setting of mon_decimal_point_wc so it occurs only when +mon_decimal_point is being set to a default, keeping both values +consistent. There is no way to tell the difference between +mon_decimal_point_wc having been set to the empty string and not +having been defined at all, for that distinction we must use +mon_decimal_point being NULL or "", and so we must logically set +the default together with mon_decimal_point. + +Lastly, there are more fixes similar to this that could be made to +ld-monetary.c, but we avoid that in order to fix just the code +required for mon_decimal_point, which impacts the ability for C.UTF-8 +to set mon_decimal_point to "", since without this fix we end up with +an inconsistent setting of mon_decimal_point set to "", but +mon_decimal_point_wc set to "." which is incorrect. + +Tested on x86_64 and i686 without regression. +Reviewed-by: Florian Weimer + +Conflict:NA +Reference:https://sourceware.org/git/?p=glibc.git;a=commit;h=1d8e3a2c6636cf0b1b8fa2f869cef6ec10726933 +--- + locale/programs/ld-monetary.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/locale/programs/ld-monetary.c b/locale/programs/ld-monetary.c +index 277b9ff042..3b0412b405 100644 +--- a/locale/programs/ld-monetary.c ++++ b/locale/programs/ld-monetary.c +@@ -207,7 +207,6 @@ No definition for %s category found"), "LC_MONETARY"); + + TEST_ELEM (int_curr_symbol, ""); + TEST_ELEM (currency_symbol, ""); +- TEST_ELEM (mon_decimal_point, "."); + TEST_ELEM (mon_thousands_sep, ""); + TEST_ELEM (positive_sign, ""); + TEST_ELEM (negative_sign, ""); +@@ -257,6 +256,7 @@ not correspond to a valid name in ISO 4217 [--no-warnings=intcurrsym]"), + record_error (0, 0, _("%s: field `%s' not defined"), + "LC_MONETARY", "mon_decimal_point"); + monetary->mon_decimal_point = "."; ++ monetary->mon_decimal_point_wc = L'.'; + } + else if (monetary->mon_decimal_point[0] == '\0' && ! be_quiet && ! nothing) + { +@@ -264,8 +264,6 @@ not correspond to a valid name in ISO 4217 [--no-warnings=intcurrsym]"), + %s: value for field `%s' must not be an empty string"), + "LC_MONETARY", "mon_decimal_point"); + } +- if (monetary->mon_decimal_point_wc == L'\0') +- monetary->mon_decimal_point_wc = L'.'; + + if (monetary->mon_grouping_len == 0) + { +-- +2.28.0.windows.1 + diff --git a/glibc.spec b/glibc.spec index 5ba3f846dc9981b8b2753f6841a25494606ac15e..a43967f46b26bd96a7c04a8056a2be72e7b9e02b 100644 --- a/glibc.spec +++ b/glibc.spec @@ -66,7 +66,7 @@ ############################################################################## Name: glibc Version: 2.34 -Release: 119 +Release: 120 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -84,8 +84,7 @@ Source7: replace_same_file_to_hard_link.py Source8: testsuite_whitelist %endif -Patch0: glibc-1070416.patch -Patch1: glibc-c-utf8-locale.patch +Patch1: glibc-1070416.patch Patch2: backport-CVE-2021-38604-0001-librt-add-test-bug-28213.patch Patch3: backport-CVE-2021-38604-0002-librt-fix-NULL-pointer-dereference-bug-28213.patch Patch4: copy_and_spawn_sgid-Avoid-double-calls-to-close.patch @@ -260,6 +259,11 @@ Patch172: riscv-align-stack-in-clone-BZ-28702.patch Patch173: stdlib-strfrom-Add-copysign-to-fix-NAN-issue-on-risc.patch Patch174: Assume-only-FLAG_ELF_LIBC6-suport.patch Patch175: elf-Restore-ldconfig-libc6-implicit-soname-logic-BZ-.patch +Patch176: backport-Add-codepoint_collation-support-for-LC_COLLATE.patch +Patch177: backport-Add-generic-C.UTF-8-locale-Bug-17318.patch +Patch178: backport-localedef-Fix-handling-of-empty-mon_decimal_point-Bu.patch +Patch179: backport-Add-TEST_COMPARE_STRING_WIDE-to-support-check.h.patch +Patch180: backport-localedata-Adjust-C.UTF-8-to-align-with-C-POSIX.patch Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch Patch9001: delete-no-hard-link-to-avoid-all_language-package-to.patch @@ -1467,6 +1471,9 @@ fi %endif %changelog +* Fri May 19 2023 Qingqing Li - 2.34-120 +- locale: reduce the size of locale C.utf-8 + * Mon May 08 2023 laokz - 2.34-119 - Backport RISC-V patches: - Align stack in clone (from v2.35)