diff --git a/openEuler-coreutils-df-direct.patch b/backport-coreutils-df-direct.patch similarity index 74% rename from openEuler-coreutils-df-direct.patch rename to backport-coreutils-df-direct.patch index 9bce2c5cf382527d9ed10d4b6214b38041b6508e..9e3434aeabec5735c3d3c048ff44a41584046c83 100644 --- a/openEuler-coreutils-df-direct.patch +++ b/backport-coreutils-df-direct.patch @@ -1,22 +1,20 @@ -From f11a739f6aabbf280fa68a8013974de7d0855ecd Mon Sep 17 00:00:00 2001 -From: xueyamao -Date: Wed, 20 Jul 2022 17:49:23 +0800 -Subject: [PATCH 2/2] coreutils-df-direct add df --direct option,direct statfs - for a file. Do not resolve mount point and show statistics directly for a - file. Signed-off-by: xueyamao +From 6e36198f10a2f63b89c89ebb5d5c185b20fb3a63 Mon Sep 17 00:00:00 2001 +From: Kamil Dudka +Date: Mon, 29 Mar 2010 17:20:34 +0000 +Subject: [PATCH] coreutils-df-direct.patch --- doc/coreutils.texi | 7 ++++++ - src/df.c | 38 ++++++++++++++++++++++++++++--- - tests/df/direct.sh | 56 ++++++++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 98 insertions(+), 3 deletions(-) - create mode 100644 tests/df/direct.sh + src/df.c | 34 ++++++++++++++++++++++++++-- + tests/df/direct.sh | 55 ++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 94 insertions(+), 2 deletions(-) + create mode 100755 tests/df/direct.sh diff --git a/doc/coreutils.texi b/doc/coreutils.texi -index cb00eea..7667ace 100644 +index 5b9a597..6810c15 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi -@@ -12067,6 +12067,13 @@ some systems (notably Solaris), doing this yields more up to date results, +@@ -12074,6 +12074,13 @@ some systems (notably Solaris), doing this yields more up to date results, but in general this option makes @command{df} much slower, especially when there are many or very busy file systems. @@ -31,7 +29,7 @@ index cb00eea..7667ace 100644 @opindex --total @cindex grand total of file system size, usage and available space diff --git a/src/df.c b/src/df.c -index 6256d0a..1c5fe5f 100644 +index 48025b9..c8efa5b 100644 --- a/src/df.c +++ b/src/df.c @@ -125,6 +125,9 @@ static bool print_type; @@ -55,16 +53,16 @@ index 6256d0a..1c5fe5f 100644 static struct option const long_options[] = { - {"all", no_argument, NULL, 'a'}, - {"block-size", required_argument, NULL, 'B'}, -+ {"direct",no_argument,NULL,DIRECT_OPTION}, - {"inodes", no_argument, NULL, 'i'}, - {"human-readable", no_argument, NULL, 'h'}, - {"si", no_argument, NULL, 'H'}, + {"all", no_argument, nullptr, 'a'}, + {"block-size", required_argument, nullptr, 'B'}, ++ {"direct", no_argument, nullptr, DIRECT_OPTION}, + {"inodes", no_argument, nullptr, 'i'}, + {"human-readable", no_argument, nullptr, 'h'}, + {"si", no_argument, nullptr, 'H'}, @@ -583,7 +588,10 @@ get_header (void) for (col = 0; col < ncolumns; col++) { - char *cell = NULL; + char *cell = nullptr; - char const *header = _(columns[col]->caption); + char const *header = (columns[col]->field == TARGET_FIELD + && direct_statfs)? @@ -73,29 +71,25 @@ index 6256d0a..1c5fe5f 100644 if (columns[col]->field == SIZE_FIELD && (header_mode == DEFAULT_MODE -@@ -1480,7 +1488,20 @@ get_point (char const *point, const struct stat *statp) +@@ -1486,6 +1494,17 @@ get_point (char const *point, const struct stat *statp) static void get_entry (char const *name, struct stat const *statp) { -- if ((S_ISBLK (statp->st_mode) || S_ISCHR (statp->st_mode)) -+ if (direct_statfs) ++ if (direct_statfs) + { + char *resolved = canonicalize_file_name (name); + if (resolved) -+ { -+ char *mp = find_mount_point (name, statp); -+ get_dev (NULL, mp, resolved, NULL, NULL, false, false, NULL, false); -+ free(mp); -+ free (resolved); -+ return; -+ } ++ { ++ get_dev (NULL, resolved, name, NULL, NULL, false, false, NULL, false); ++ free (resolved); ++ return; ++ } + } -+ -+ if ((S_ISBLK (statp->st_mode) || S_ISCHR (statp->st_mode)) ++ + if ((S_ISBLK (statp->st_mode) || S_ISCHR (statp->st_mode)) && get_device (name)) return; - -@@ -1550,6 +1571,7 @@ or all file systems by default.\n\ +@@ -1556,6 +1575,7 @@ or all file systems by default.\n\ -B, --block-size=SIZE scale sizes by SIZE before printing them; e.g.,\n\ '-BM' prints sizes in units of 1,048,576 bytes;\n\ see SIZE format below\n\ @@ -103,24 +97,24 @@ index 6256d0a..1c5fe5f 100644 -h, --human-readable print sizes in powers of 1024 (e.g., 1023M)\n\ -H, --si print sizes in powers of 1000 (e.g., 1.1G)\n\ "), stdout); -@@ -1640,6 +1662,9 @@ main (int argc, char **argv) +@@ -1646,6 +1666,9 @@ main (int argc, char **argv) xstrtol_fatal (e, oi, c, long_options, optarg); } break; -+ case DIRECT_OPTION: ++ case DIRECT_OPTION: + direct_statfs = true; + break; case 'i': if (header_mode == OUTPUT_MODE) { -@@ -1736,6 +1761,13 @@ main (int argc, char **argv) +@@ -1742,6 +1765,13 @@ main (int argc, char **argv) } } + if (direct_statfs && show_local_fs) + { + error (0, 0, _("options --direct and --local (-l) are mutually " -+ "exclusive")); ++ "exclusive")); + usage (EXIT_FAILURE); + } + @@ -128,11 +122,11 @@ index 6256d0a..1c5fe5f 100644 { if (posix_format) diff --git a/tests/df/direct.sh b/tests/df/direct.sh -new file mode 100644 -index 0000000..25fbc57 +new file mode 100755 +index 0000000..8e4cfb8 --- /dev/null +++ b/tests/df/direct.sh -@@ -0,0 +1,56 @@ +@@ -0,0 +1,55 @@ +#!/bin/sh +# Ensure "df --direct" works as documented + @@ -188,7 +182,6 @@ index 0000000..25fbc57 +compare file_out file_exp || fail=1 + +Exit $fail -+ -- -2.33.0 +2.31.1 diff --git a/backport-coreutils-i18n.patch b/backport-coreutils-i18n.patch index 8492fe6acbba139ec5fb5670681126b4df72f1b5..be8e0b178e6a20836b7fcdf1bbfe5bd672ca81a4 100644 --- a/backport-coreutils-i18n.patch +++ b/backport-coreutils-i18n.patch @@ -1,14 +1,14 @@ -From 01010419a6499768563e7b2f3fd56cf16edda75e Mon Sep 17 00:00:00 2001 +From 3a1b92e80708319bcc89852e3da1029c3d1ff6b3 Mon Sep 17 00:00:00 2001 From: rpm-build -Date: Mon, 4 Oct 2021 08:54:37 +0200 +Date: Wed, 30 Aug 2023 17:19:58 +0200 Subject: [PATCH] coreutils-i18n.patch --- bootstrap.conf | 1 + - configure.ac | 2 + + configure.ac | 6 + lib/linebuffer.h | 8 + - lib/mbfile.c | 3 + - lib/mbfile.h | 255 ++++++++++++ + lib/mbfile.c | 20 + + lib/mbfile.h | 267 ++++++++++++ m4/mbfile.m4 | 14 + src/cut.c | 508 +++++++++++++++++++++-- src/expand-common.c | 114 ++++++ @@ -29,13 +29,13 @@ Subject: [PATCH] coreutils-i18n.patch tests/misc/fold.pl | 50 ++- tests/misc/join.pl | 50 +++ tests/misc/sort-mb-tests.sh | 45 ++ - tests/misc/sort-merge.pl | 42 ++ - tests/misc/sort.pl | 40 +- tests/misc/unexpand.pl | 39 ++ - tests/misc/uniq.pl | 55 +++ tests/pr/pr-tests.pl | 49 +++ + tests/sort/sort-merge.pl | 42 ++ + tests/sort/sort.pl | 40 +- tests/unexpand/mb.sh | 172 ++++++++ - 31 files changed, 3699 insertions(+), 242 deletions(-) + tests/uniq/uniq.pl | 55 +++ + 31 files changed, 3732 insertions(+), 242 deletions(-) create mode 100644 lib/mbfile.c create mode 100644 lib/mbfile.h create mode 100644 m4/mbfile.m4 @@ -45,10 +45,10 @@ Subject: [PATCH] coreutils-i18n.patch create mode 100755 tests/unexpand/mb.sh diff --git a/bootstrap.conf b/bootstrap.conf -index c1399e3..60b39cf 100644 +index bd73ff2..0e450cd 100644 --- a/bootstrap.conf +++ b/bootstrap.conf -@@ -165,6 +165,7 @@ gnulib_modules=" +@@ -167,6 +167,7 @@ gnulib_modules=" maintainer-makefile malloc-gnu manywarnings @@ -57,20 +57,24 @@ index c1399e3..60b39cf 100644 mbrtowc mbsalign diff --git a/configure.ac b/configure.ac -index 7e4afc9..4656a35 100644 +index 8ffc0b7..ca3305d 100644 --- a/configure.ac +++ b/configure.ac -@@ -477,6 +477,8 @@ fi +@@ -448,6 +448,12 @@ fi # I'm leaving it here for now. This whole thing needs to be modernized... gl_WINSIZE_IN_PTEM +gl_MBFILE ++dnl Do not use gl_MODULE_INDICATOR([mbfile]) here: we don't want 'struct mbchar' ++dnl to have a different size in lib/ than in tests/. ++AC_DEFINE([GNULIB_MBFILE], [1], ++ [Define to 1 if the gnulib module 'mbfile' is in use.]) + gl_HEADER_TIOCGWINSZ_IN_TERMIOS_H if test $gl_cv_sys_tiocgwinsz_needs_termios_h = no && \ diff --git a/lib/linebuffer.h b/lib/linebuffer.h -index 07d45ca..af62e6c 100644 +index b4cc8e4..f2bbb52 100644 --- a/lib/linebuffer.h +++ b/lib/linebuffer.h @@ -22,6 +22,11 @@ @@ -97,34 +101,51 @@ index 07d45ca..af62e6c 100644 /* Initialize linebuffer LINEBUFFER for use. */ diff --git a/lib/mbfile.c b/lib/mbfile.c new file mode 100644 -index 0000000..b0a468e +index 0000000..8d2957b --- /dev/null +++ b/lib/mbfile.c -@@ -0,0 +1,3 @@ +@@ -0,0 +1,20 @@ ++/* Multibyte character I/O: macros for multi-byte encodings. ++ Copyright (C) 2012-2023 Free Software Foundation, Inc. ++ ++ This file is free software: you can redistribute it and/or modify ++ it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation, either version 3 of the ++ License, or (at your option) any later version. ++ ++ This file is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public License ++ along with this program. If not, see . */ ++ +#include ++ +#define MBFILE_INLINE _GL_EXTERN_INLINE +#include "mbfile.h" diff --git a/lib/mbfile.h b/lib/mbfile.h new file mode 100644 -index 0000000..11f1b12 +index 0000000..ad61c19 --- /dev/null +++ b/lib/mbfile.h -@@ -0,0 +1,255 @@ +@@ -0,0 +1,267 @@ +/* Multibyte character I/O: macros for multi-byte encodings. -+ Copyright (C) 2001, 2005, 2009-2015 Free Software Foundation, Inc. ++ Copyright (C) 2001, 2005, 2009-2023 Free Software Foundation, Inc. + -+ This program is free software: you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3 of the License, or -+ (at your option) any later version. ++ This file is free software: you can redistribute it and/or modify ++ it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation, either version 3 of the ++ License, or (at your option) any later version. + -+ This program is distributed in the hope that it will be useful, ++ This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. ++ GNU Lesser General Public License for more details. + -+ You should have received a copy of the GNU General Public License -+ along with this program. If not, see . */ ++ You should have received a copy of the GNU Lesser General Public License ++ along with this program. If not, see . */ + +/* Written by Mitsuru Chinen + and Bruno Haible . */ @@ -159,24 +180,18 @@ index 0000000..11f1b12 +#ifndef _MBFILE_H +#define _MBFILE_H 1 + ++/* This file uses _GL_INLINE_HEADER_BEGIN, _GL_INLINE. */ ++#if !_GL_CONFIG_H_INCLUDED ++ #error "Please include config.h first." ++#endif ++ +#include -+#include +#include +#include -+ -+/* Tru64 with Desktop Toolkit C has a bug: must be included before -+ . -+ BSD/OS 4.1 has a bug: and must be included before -+ . */ -+#include -+#include +#include + +#include "mbchar.h" + -+#ifndef _GL_INLINE_HEADER_BEGIN -+ #error "Please include config.h first." -+#endif +_GL_INLINE_HEADER_BEGIN +#ifndef MBFILE_INLINE +# define MBFILE_INLINE _GL_INLINE @@ -195,6 +210,7 @@ index 0000000..11f1b12 +MBFILE_INLINE void +mbfile_multi_getc (struct mbchar *mbc, struct mbfile_multi *mbf) +{ ++ unsigned int new_bufcount; + size_t bytes; + + /* If EOF has already been seen, don't use getc. This matters if @@ -210,64 +226,70 @@ index 0000000..11f1b12 + return; + } + -+ /* Before using mbrtowc, we need at least one byte. */ -+ if (mbf->bufcount == 0) ++ new_bufcount = mbf->bufcount; ++ ++ /* If mbf->state is not in an initial state, some more 32-bit wide character ++ may be hiding in the state. We need to call mbrtoc32 again. */ ++ #if GNULIB_MBRTOC32_REGULAR ++ assert (mbsinit (&mbf->state)); ++ #else ++ if (mbsinit (&mbf->state)) ++ #endif + { -+ int c = getc (mbf->fp); -+ if (c == EOF) ++ /* Before using mbrtoc32, we need at least one byte. */ ++ if (new_bufcount == 0) + { -+ mbf->eof_seen = true; -+ goto eof; ++ int c = getc (mbf->fp); ++ if (c == EOF) ++ { ++ mbf->eof_seen = true; ++ goto eof; ++ } ++ mbf->buf[0] = (unsigned char) c; ++ new_bufcount++; + } -+ mbf->buf[0] = (unsigned char) c; -+ mbf->bufcount++; -+ } + -+ /* Handle most ASCII characters quickly, without calling mbrtowc(). */ -+ if (mbf->bufcount == 1 && mbsinit (&mbf->state) && is_basic (mbf->buf[0])) -+ { -+ /* These characters are part of the basic character set. ISO C 99 -+ guarantees that their wide character code is identical to their -+ char code. */ -+ mbc->wc = mbc->buf[0] = mbf->buf[0]; -+ mbc->wc_valid = true; -+ mbc->ptr = &mbc->buf[0]; -+ mbc->bytes = 1; -+ mbf->bufcount = 0; -+ return; ++ /* Handle most ASCII characters quickly, without calling mbrtoc32(). */ ++ if (new_bufcount == 1 && is_basic (mbf->buf[0])) ++ { ++ /* These characters are part of the POSIX portable character set. ++ For most of them, namely those in the ISO C basic character set, ++ ISO C 99 guarantees that their wide character code is identical to ++ their char code. For the few other ones, this is the case as well, ++ in all locale encodings that are in use. The 32-bit wide character ++ code is the same as well. */ ++ mbc->wc = mbc->buf[0] = mbf->buf[0]; ++ mbc->wc_valid = true; ++ mbc->ptr = &mbc->buf[0]; ++ mbc->bytes = 1; ++ mbf->bufcount = 0; ++ return; ++ } + } + -+ /* Use mbrtowc on an increasing number of bytes. Read only as many bytes ++ /* Use mbrtoc32 on an increasing number of bytes. Read only as many bytes + from mbf->fp as needed. This is needed to give reasonable interactive + behaviour when mbf->fp is connected to an interactive tty. */ + for (;;) + { -+ /* We don't know whether the 'mbrtowc' function updates the state when -+ it returns -2, - this is the ISO C 99 and glibc-2.2 behaviour - or -+ not - amended ANSI C, glibc-2.1 and Solaris 2.7 behaviour. We -+ don't have an autoconf test for this, yet. -+ The new behaviour would allow us to feed the bytes one by one into -+ mbrtowc. But the old behaviour forces us to feed all bytes since -+ the end of the last character into mbrtowc. Since we want to retry -+ with more bytes when mbrtowc returns -2, we must backup the state -+ before calling mbrtowc, because implementations with the new -+ behaviour will clobber it. */ -+ mbstate_t backup_state = mbf->state; -+ -+ bytes = mbrtowc (&mbc->wc, &mbf->buf[0], mbf->bufcount, &mbf->state); ++ /* Feed the bytes one by one into mbrtoc32. */ ++ bytes = mbrtoc32 (&mbc->wc, &mbf->buf[mbf->bufcount], new_bufcount - mbf->bufcount, &mbf->state); + + if (bytes == (size_t) -1) + { + /* An invalid multibyte sequence was encountered. */ ++ mbf->bufcount = new_bufcount; + /* Return a single byte. */ + bytes = 1; + mbc->wc_valid = false; ++ /* Allow the next invocation to continue from a sane state. */ ++ mbszero (&mbf->state); + break; + } + else if (bytes == (size_t) -2) + { + /* An incomplete multibyte character. */ -+ mbf->state = backup_state; ++ mbf->bufcount = new_bufcount; + if (mbf->bufcount == MBCHAR_BUF_SIZE) + { + /* An overlong incomplete multibyte sequence was encountered. */ @@ -278,28 +300,42 @@ index 0000000..11f1b12 + } + else + { -+ /* Read one more byte and retry mbrtowc. */ ++ /* Read one more byte and retry mbrtoc32. */ + int c = getc (mbf->fp); + if (c == EOF) + { + /* An incomplete multibyte character at the end. */ + mbf->eof_seen = true; -+ bytes = mbf->bufcount; ++ bytes = new_bufcount; + mbc->wc_valid = false; + break; + } -+ mbf->buf[mbf->bufcount] = (unsigned char) c; -+ mbf->bufcount++; ++ mbf->buf[new_bufcount] = (unsigned char) c; ++ new_bufcount++; + } + } + else + { -+ if (bytes == 0) ++ #if !GNULIB_MBRTOC32_REGULAR ++ if (bytes == (size_t) -3) + { -+ /* A null wide character was encountered. */ -+ bytes = 1; -+ assert (mbf->buf[0] == '\0'); -+ assert (mbc->wc == 0); ++ /* The previous multibyte sequence produced an additional 32-bit ++ wide character. */ ++ mbf->bufcount = new_bufcount; ++ bytes = 0; ++ } ++ else ++ #endif ++ { ++ bytes = mbf->bufcount + bytes; ++ mbf->bufcount = new_bufcount; ++ if (bytes == 0) ++ { ++ /* A null 32-bit wide character was encountered. */ ++ bytes = 1; ++ assert (mbf->buf[0] == '\0'); ++ assert (mbc->wc == 0); ++ } + } + mbc->wc_valid = true; + break; @@ -350,7 +386,7 @@ index 0000000..11f1b12 + ((mbf).fp = (stream), \ + (mbf).eof_seen = false, \ + (mbf).have_pushback = false, \ -+ memset (&(mbf).state, '\0', sizeof (mbstate_t)), \ ++ mbszero (&(mbf).state), \ + (mbf).bufcount = 0) + +#define mbf_getc(mbc, mbf) mbfile_multi_getc (&(mbc), &(mbf)) @@ -359,20 +395,17 @@ index 0000000..11f1b12 + +#define mb_iseof(mbc) ((mbc).bytes == 0) + -+#ifndef _GL_INLINE_HEADER_BEGIN -+ #error "Please include config.h first." -+#endif -+_GL_INLINE_HEADER_BEGIN ++_GL_INLINE_HEADER_END + +#endif /* _MBFILE_H */ diff --git a/m4/mbfile.m4 b/m4/mbfile.m4 new file mode 100644 -index 0000000..8589902 +index 0000000..83068a9 --- /dev/null +++ b/m4/mbfile.m4 @@ -0,0 +1,14 @@ +# mbfile.m4 serial 7 -+dnl Copyright (C) 2005, 2008-2015 Free Software Foundation, Inc. ++dnl Copyright (C) 2005, 2008-2023 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. @@ -386,11 +419,11 @@ index 0000000..8589902 + : +]) diff --git a/src/cut.c b/src/cut.c -index 6fd8978..faef877 100644 +index b4edbab..65e4658 100644 --- a/src/cut.c +++ b/src/cut.c -@@ -28,6 +28,11 @@ - #include +@@ -27,6 +27,11 @@ + #include #include #include + @@ -400,8 +433,8 @@ index 6fd8978..faef877 100644 +#endif #include "system.h" - #include "error.h" -@@ -36,6 +41,18 @@ + #include "assure.h" +@@ -35,6 +40,18 @@ #include "set-fields.h" @@ -420,7 +453,7 @@ index 6fd8978..faef877 100644 /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "cut" -@@ -52,6 +69,52 @@ +@@ -51,6 +68,52 @@ } \ while (0) @@ -473,8 +506,8 @@ index 6fd8978..faef877 100644 /* Pointer inside RP. When checking if a byte or field is selected by a finite range, we check if it is between CURRENT_RP.LO -@@ -59,6 +122,9 @@ - CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */ +@@ -58,6 +121,9 @@ + CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */ static struct field_range_pair *current_rp; +/* Length of the delimiter given as argument to -d. */ @@ -483,7 +516,7 @@ index 6fd8978..faef877 100644 /* This buffer is used to support the semantics of the -s option (or lack of same) when the specified field list includes (does not include) the first field. In both of those cases, the entire -@@ -71,6 +137,29 @@ static char *field_1_buffer; +@@ -70,6 +136,29 @@ static char *field_1_buffer; /* The number of bytes allocated for FIELD_1_BUFFER. */ static size_t field_1_bufsize; @@ -510,18 +543,18 @@ index 6fd8978..faef877 100644 + if this program runs on multibyte locale. */ +static int force_singlebyte_mode; + - /* If true do not output lines containing no delimiter characters. + /* If true, do not output lines containing no delimiter characters. Otherwise, all such lines are printed. This option is valid only with field mode. */ -@@ -82,10 +171,16 @@ static bool complement; +@@ -81,10 +170,16 @@ static bool complement; - /* The delimiter character for field mode. */ + /* The delimiter character for field mode. */ static unsigned char delim; +#if HAVE_WCHAR_H +static wchar_t wcdelim; +#endif - /* The delimiter for each line/record. */ + /* The delimiter for each line/record. */ static unsigned char line_delim = '\n'; +/* True if the --output-delimiter=STRING option was specified. */ @@ -530,17 +563,17 @@ index 6fd8978..faef877 100644 /* The length of output_delimiter_string. */ static size_t output_delimiter_length; -@@ -93,9 +188,6 @@ static size_t output_delimiter_length; +@@ -92,9 +187,6 @@ static size_t output_delimiter_length; string consisting of the input delimiter. */ static char *output_delimiter_string; -/* The output delimiter string contents, if the default. */ -static char output_delimiter_default[1]; - - /* True if we have ever read standard input. */ + /* True if we have ever read standard input. */ static bool have_read_stdin; -@@ -149,7 +241,7 @@ Print selected parts of lines from each FILE to standard output.\n\ +@@ -148,7 +240,7 @@ Print selected parts of lines from each FILE to standard output.\n\ -f, --fields=LIST select only these fields; also print any line\n\ that contains no delimiter character, unless\n\ the -s option is specified\n\ @@ -549,7 +582,7 @@ index 6fd8978..faef877 100644 "), stdout); fputs (_("\ --complement complement the set of selected bytes, characters\n\ -@@ -249,7 +341,7 @@ cut_bytes (FILE *stream) +@@ -252,7 +344,7 @@ cut_bytes (FILE *stream) next_item (&byte_idx); if (print_kth (byte_idx)) { @@ -558,7 +591,7 @@ index 6fd8978..faef877 100644 { if (print_delimiter && is_range_start_index (byte_idx)) { -@@ -265,6 +357,82 @@ cut_bytes (FILE *stream) +@@ -271,6 +363,82 @@ cut_bytes (FILE *stream) } } @@ -641,7 +674,7 @@ index 6fd8978..faef877 100644 /* Read from stream STREAM, printing to standard output any selected fields. */ static void -@@ -410,11 +578,218 @@ cut_fields (FILE *stream) +@@ -433,11 +601,218 @@ cut_fields (FILE *stream) } } @@ -862,18 +895,18 @@ index 6fd8978..faef877 100644 { FILE *stream; -@@ -458,8 +833,8 @@ main (int argc, char **argv) +@@ -482,8 +857,8 @@ main (int argc, char **argv) int optc; bool ok; bool delim_specified = false; - bool byte_mode = false; -- char *spec_list_string = NULL; -+ char *spec_list_string IF_LINT ( = NULL); +- char *spec_list_string = nullptr; ++ char *spec_list_string IF_LINT ( = nullptr); + char mbdelim[MB_LEN_MAX + 1]; initialize_main (&argc, &argv); set_program_name (argv[0]); -@@ -469,6 +844,8 @@ main (int argc, char **argv) +@@ -493,6 +868,8 @@ main (int argc, char **argv) atexit (close_stdout); @@ -882,12 +915,12 @@ index 6fd8978..faef877 100644 /* By default, all non-delimited lines are printed. */ suppress_non_delimited = false; -@@ -480,35 +857,77 @@ main (int argc, char **argv) +@@ -505,35 +882,77 @@ main (int argc, char **argv) switch (optc) { case 'b': - case 'c': - /* Build the byte list. */ + /* Build the byte list. */ - byte_mode = true; - FALLTHROUGH; + if (operating_mode != undefined_mode) @@ -897,7 +930,7 @@ index 6fd8978..faef877 100644 + break; + + case 'c': -+ /* Build the character list. */ ++ /* Build the character list. */ + if (operating_mode != undefined_mode) + FATAL_ERROR (_("only one type of list may be specified")); + operating_mode = character_mode; @@ -905,7 +938,7 @@ index 6fd8978..faef877 100644 + break; + case 'f': - /* Build the field list. */ + /* Build the field list. */ - if (spec_list_string) - FATAL_ERROR (_("only one list may be specified")); + if (operating_mode != undefined_mode) @@ -915,7 +948,7 @@ index 6fd8978..faef877 100644 break; case 'd': - /* New delimiter. */ + /* New delimiter. */ /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ - if (optarg[0] != '\0' && optarg[1] != '\0') - FATAL_ERROR (_("the delimiter must be a single character")); @@ -970,7 +1003,7 @@ index 6fd8978..faef877 100644 break; case 's': -@@ -532,40 +951,57 @@ main (int argc, char **argv) +@@ -555,40 +974,57 @@ main (int argc, char **argv) } } @@ -1011,7 +1044,7 @@ index 6fd8978..faef877 100644 +#endif + } - if (output_delimiter_string == NULL) + if (output_delimiter_string == nullptr) { - output_delimiter_default[0] = delim; - output_delimiter_string = output_delimiter_default; @@ -1046,18 +1079,18 @@ index 6fd8978..faef877 100644 if (have_read_stdin && fclose (stdin) == EOF) diff --git a/src/expand-common.c b/src/expand-common.c -index deec1bd..b39f740 100644 +index 89fa56a..c102e6e 100644 --- a/src/expand-common.c +++ b/src/expand-common.c -@@ -19,6 +19,7 @@ - #include +@@ -18,6 +18,7 @@ + #include #include +#include #include "system.h" - #include "die.h" - #include "error.h" -@@ -125,6 +126,119 @@ set_increment_size (uintmax_t tabval) + #include "fadvise.h" + #include "quote.h" +@@ -122,6 +123,119 @@ set_increment_size (uintmax_t tabval) return ok; } @@ -1178,7 +1211,7 @@ index deec1bd..b39f740 100644 to the list of tab stops. */ extern void diff --git a/src/expand-common.h b/src/expand-common.h -index 5f59a0e..835b9d5 100644 +index daed31e..f6b2f68 100644 --- a/src/expand-common.h +++ b/src/expand-common.h @@ -25,6 +25,18 @@ extern size_t max_column_width; @@ -1201,7 +1234,7 @@ index 5f59a0e..835b9d5 100644 extern void add_tab_stop (uintmax_t tabval); diff --git a/src/expand.c b/src/expand.c -index ed78ca8..a4cefa1 100644 +index 0e74d0c..7080c51 100644 --- a/src/expand.c +++ b/src/expand.c @@ -37,6 +37,9 @@ @@ -1212,12 +1245,12 @@ index ed78ca8..a4cefa1 100644 +#include + #include "system.h" - #include "die.h" + #include "expand-common.h" -@@ -97,19 +100,41 @@ expand (void) +@@ -95,19 +98,41 @@ expand (void) { /* Input stream. */ - FILE *fp = next_file (NULL); + FILE *fp = next_file (nullptr); + mb_file_t mbf; + mbf_char_t c; + /* True if the starting locale is utf8. */ @@ -1260,7 +1293,7 @@ index ed78ca8..a4cefa1 100644 /* The following variables have valid values only when CONVERT is true: */ -@@ -119,17 +144,48 @@ expand (void) +@@ -117,17 +142,48 @@ expand (void) /* Index in TAB_LIST of next tab stop to examine. */ size_t tab_index = 0; @@ -1313,9 +1346,9 @@ index ed78ca8..a4cefa1 100644 { /* Column the next input tab stop is on. */ uintmax_t next_tab_column; -@@ -148,32 +204,34 @@ expand (void) +@@ -146,32 +202,34 @@ expand (void) if (putchar (' ') < 0) - die (EXIT_FAILURE, errno, _("write error")); + write_error (); - c = ' '; + mb_setascii (&c, ' '); @@ -1335,7 +1368,7 @@ index ed78ca8..a4cefa1 100644 - column++; + column += mb_width (c); if (!column) - die (EXIT_FAILURE, 0, _("input line is too long")); + error (EXIT_FAILURE, 0, _("input line is too long")); } - convert &= convert_entire_line || !! isblank (c); @@ -1349,7 +1382,7 @@ index ed78ca8..a4cefa1 100644 - if (putchar (c) < 0) + mb_putc (c, stdout); + if (ferror (stdout)) - die (EXIT_FAILURE, errno, _("write error")); + write_error (); } - while (c != '\n'); + while (!mb_iseq (c, '\n')); @@ -1357,10 +1390,10 @@ index ed78ca8..a4cefa1 100644 } diff --git a/src/fold.c b/src/fold.c -index f07a90b..d32dbfd 100644 +index 5c0428d..2372047 100644 --- a/src/fold.c +++ b/src/fold.c -@@ -22,12 +22,34 @@ +@@ -22,10 +22,32 @@ #include #include @@ -1375,8 +1408,6 @@ index f07a90b..d32dbfd 100644 +#endif + #include "system.h" - #include "die.h" - #include "error.h" #include "fadvise.h" #include "xdectoint.h" @@ -1395,7 +1426,7 @@ index f07a90b..d32dbfd 100644 #define TAB_WIDTH 8 /* The official name of this program (e.g., no 'g' prefix). */ -@@ -35,20 +57,41 @@ +@@ -33,20 +55,41 @@ #define AUTHORS proper_name ("David MacKenzie") @@ -1436,12 +1467,12 @@ index f07a90b..d32dbfd 100644 static struct option const longopts[] = { - {"bytes", no_argument, NULL, 'b'}, -+ {"characters", no_argument, NULL, 'c'}, - {"spaces", no_argument, NULL, 's'}, - {"width", required_argument, NULL, 'w'}, + {"bytes", no_argument, nullptr, 'b'}, ++ {"characters", no_argument, nullptr, 'c'}, + {"spaces", no_argument, nullptr, 's'}, + {"width", required_argument, nullptr, 'w'}, {GETOPT_HELP_OPTION_DECL}, -@@ -76,6 +119,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ +@@ -74,6 +117,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ fputs (_("\ -b, --bytes count bytes rather than columns\n\ @@ -1449,7 +1480,7 @@ index f07a90b..d32dbfd 100644 -s, --spaces break at spaces\n\ -w, --width=WIDTH use WIDTH columns instead of 80\n\ "), stdout); -@@ -93,7 +137,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ +@@ -91,7 +135,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ static size_t adjust_column (size_t column, char c) { @@ -1458,7 +1489,7 @@ index f07a90b..d32dbfd 100644 { if (c == '\b') { -@@ -116,30 +160,14 @@ adjust_column (size_t column, char c) +@@ -114,30 +158,14 @@ adjust_column (size_t column, char c) to stdout, with maximum line length WIDTH. Return true if successful. */ @@ -1471,7 +1502,7 @@ index f07a90b..d32dbfd 100644 int c; size_t column = 0; /* Screen column where next char will go. */ size_t offset_out = 0; /* Index in 'line_out' for next char. */ - static char *line_out = NULL; + static char *line_out = nullptr; static size_t allocated_out = 0; - int saved_errno; - @@ -1483,7 +1514,7 @@ index f07a90b..d32dbfd 100644 - else - istream = fopen (filename, "r"); - -- if (istream == NULL) +- if (istream == nullptr) - { - error (0, errno, "%s", quotef (filename)); - return false; @@ -1491,7 +1522,7 @@ index f07a90b..d32dbfd 100644 fadvise (istream, FADVISE_SEQUENTIAL); -@@ -169,6 +197,15 @@ fold_file (char const *filename, size_t width) +@@ -167,6 +195,15 @@ fold_file (char const *filename, size_t width) bool found_blank = false; size_t logical_end = offset_out; @@ -1507,7 +1538,7 @@ index f07a90b..d32dbfd 100644 /* Look for the last blank. */ while (logical_end) { -@@ -215,13 +252,225 @@ fold_file (char const *filename, size_t width) +@@ -213,13 +250,225 @@ fold_file (char const *filename, size_t width) line_out[offset_out++] = c; } @@ -1735,7 +1766,7 @@ index f07a90b..d32dbfd 100644 if (STREQ (filename, "-")) clearerr (istream); else if (fclose (istream) != 0 && !saved_errno) -@@ -252,7 +501,8 @@ main (int argc, char **argv) +@@ -250,7 +499,8 @@ main (int argc, char **argv) atexit (close_stdout); @@ -1743,9 +1774,9 @@ index f07a90b..d32dbfd 100644 + operating_mode = column_mode; + break_spaces = have_read_stdin = false; - while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) + while ((optc = getopt_long (argc, argv, shortopts, longopts, nullptr)) != -1) { -@@ -261,7 +511,15 @@ main (int argc, char **argv) +@@ -259,7 +509,15 @@ main (int argc, char **argv) switch (optc) { case 'b': /* Count bytes rather than columns. */ @@ -1763,10 +1794,10 @@ index f07a90b..d32dbfd 100644 case 's': /* Break at word boundaries. */ diff --git a/src/join.c b/src/join.c -index f2fd172..6c7d1ed 100644 +index 0bcfa75..8a3bcf1 100644 --- a/src/join.c +++ b/src/join.c -@@ -22,19 +22,33 @@ +@@ -21,18 +21,32 @@ #include #include @@ -1781,8 +1812,7 @@ index f2fd172..6c7d1ed 100644 +#endif + #include "system.h" - #include "die.h" - #include "error.h" + #include "assure.h" #include "fadvise.h" #include "hard-locale.h" #include "linebuffer.h" @@ -1801,7 +1831,7 @@ index f2fd172..6c7d1ed 100644 /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "join" -@@ -136,10 +150,12 @@ static struct outlist outlist_head; +@@ -134,10 +148,12 @@ static struct outlist outlist_head; /* Last element in 'outlist', where a new element can be added. */ static struct outlist *outlist_end = &outlist_head; @@ -1813,12 +1843,12 @@ index f2fd172..6c7d1ed 100644 + by any nonempty string of blanks. */ +static char *tab = NULL; + -+/* The number of bytes used for tab. */ ++/* The number of bytes used for tab. */ +static size_t tablen = 0; /* If nonzero, check that the input is correctly ordered. */ static enum -@@ -280,13 +296,14 @@ xfields (struct line *line) +@@ -277,13 +293,14 @@ xfields (struct line *line) if (ptr == lim) return; @@ -1827,8 +1857,8 @@ index f2fd172..6c7d1ed 100644 { + unsigned char t = tab[0]; char *sep; -- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) -+ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) +- for (; (sep = memchr (ptr, tab, lim - ptr)) != nullptr; ptr = sep + 1) ++ for (; (sep = memchr (ptr, t, lim - ptr)) != nullptr; ptr = sep + 1) extract_field (line, ptr, sep - ptr); } - else if (tab < 0) @@ -1836,7 +1866,7 @@ index f2fd172..6c7d1ed 100644 { /* Skip leading blanks before the first field. */ while (field_sep (*ptr)) -@@ -310,6 +327,147 @@ xfields (struct line *line) +@@ -307,6 +324,147 @@ xfields (struct line *line) extract_field (line, ptr, lim - ptr); } @@ -1984,18 +2014,18 @@ index f2fd172..6c7d1ed 100644 static void freeline (struct line *line) { -@@ -331,56 +489,133 @@ keycmp (struct line const *line1, struct line const *line2, - size_t jf_1, size_t jf_2) +@@ -328,56 +486,133 @@ keycmp (struct line const *line1, struct line const *line2, + idx_t jf_1, idx_t jf_2) { /* Start of field to compare in each file. */ - char *beg1; - char *beg2; - -- size_t len1; -- size_t len2; /* Length of fields to compare. */ +- idx_t len1; +- idx_t len2; /* Length of fields to compare. */ + char *beg[2]; + char *copy[2]; -+ size_t len[2]; /* Length of fields to compare. */ ++ idx_t len[2]; /* Length of fields to compare. */ int diff; + int i, j; + int mallocd = 0; @@ -2009,9 +2039,9 @@ index f2fd172..6c7d1ed 100644 } else { -- beg1 = NULL; +- beg1 = nullptr; - len1 = 0; -+ beg[0] = NULL; ++ beg[0] = nullptr; + len[0] = 0; } @@ -2024,9 +2054,9 @@ index f2fd172..6c7d1ed 100644 } else { -- beg2 = NULL; +- beg2 = nullptr; - len2 = 0; -+ beg[1] = NULL; ++ beg[1] = nullptr; + len[1] = 0; } @@ -2141,7 +2171,7 @@ index f2fd172..6c7d1ed 100644 } /* Check that successive input lines PREV and CURRENT from input file -@@ -472,6 +707,11 @@ get_line (FILE *fp, struct line **linep, int which) +@@ -469,6 +704,11 @@ get_line (FILE *fp, struct line **linep, int which) } ++line_no[which - 1]; @@ -2153,7 +2183,7 @@ index f2fd172..6c7d1ed 100644 xfields (line); if (prevline[which - 1]) -@@ -567,21 +807,28 @@ prfield (size_t n, struct line const *line) +@@ -562,21 +802,28 @@ prfield (idx_t n, struct line const *line) /* Output all the fields in line, other than the join field. */ @@ -2166,10 +2196,10 @@ index f2fd172..6c7d1ed 100644 + while (0) + static void - prfields (struct line const *line, size_t join_field, size_t autocount) + prfields (struct line const *line, idx_t join_field, idx_t autocount) { - size_t i; - size_t nfields = autoformat ? autocount : line->nfields; + idx_t i; + idx_t nfields = autoformat ? autocount : line->nfields; - char output_separator = tab < 0 ? ' ' : tab; for (i = 0; i < join_field && i < nfields; ++i) @@ -2185,24 +2215,24 @@ index f2fd172..6c7d1ed 100644 prfield (i, line); } } -@@ -592,7 +839,6 @@ static void +@@ -587,7 +834,6 @@ static void prjoin (struct line const *line1, struct line const *line2) { const struct outlist *outlist; - char output_separator = tab < 0 ? ' ' : tab; - size_t field; + idx_t field; struct line const *line; -@@ -626,7 +872,7 @@ prjoin (struct line const *line1, struct line const *line2) +@@ -621,7 +867,7 @@ prjoin (struct line const *line1, struct line const *line2) o = o->next; - if (o == NULL) + if (o == nullptr) break; - putchar (output_separator); + PUT_TAB_CHAR; } putchar (eolchar); } -@@ -1102,20 +1348,43 @@ main (int argc, char **argv) +@@ -1086,20 +1332,43 @@ main (int argc, char **argv) case 't': { @@ -2235,8 +2265,8 @@ index f2fd172..6c7d1ed 100644 - if (STREQ (optarg, "\\0")) - newtab = '\0'; - else -- die (EXIT_FAILURE, 0, _("multi-character tab %s"), -- quote (optarg)); +- error (EXIT_FAILURE, 0, _("multi-character tab %s"), +- quote (optarg)); + if (newtablen == 1 && newtab[1]) + { + if (STREQ (newtab, "\\0")) @@ -2246,20 +2276,20 @@ index f2fd172..6c7d1ed 100644 + if (tab != NULL && strcmp (tab, newtab)) + { + free (newtab); -+ die (EXIT_FAILURE, 0, _("incompatible tabs")); ++ error (EXIT_FAILURE, 0, _("incompatible tabs")); } - if (0 <= tab && tab != newtab) -- die (EXIT_FAILURE, 0, _("incompatible tabs")); +- error (EXIT_FAILURE, 0, _("incompatible tabs")); tab = newtab; + tablen = newtablen; } break; diff --git a/src/local.mk b/src/local.mk -index e1d15ce..1a5ffaa 100644 +index f45b911..6f7036a 100644 --- a/src/local.mk +++ b/src/local.mk -@@ -438,8 +438,8 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) +@@ -447,8 +447,8 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) src_basenc_SOURCES = src/basenc.c src_basenc_CPPFLAGS = -DBASE_TYPE=42 $(AM_CPPFLAGS) @@ -2271,12 +2301,12 @@ index e1d15ce..1a5ffaa 100644 src_wc_SOURCES = src/wc.c if USE_AVX2_WC_LINECOUNT diff --git a/src/pr.c b/src/pr.c -index 4c17c00..b4fab1c 100644 +index 419545c..702e025 100644 --- a/src/pr.c +++ b/src/pr.c -@@ -311,6 +311,24 @@ - +@@ -312,6 +312,24 @@ #include + #include #include + +/* Get MB_LEN_MAX. */ @@ -2297,9 +2327,9 @@ index 4c17c00..b4fab1c 100644 +#endif + #include "system.h" - #include "die.h" - #include "error.h" -@@ -325,6 +343,18 @@ + #include "fadvise.h" + #include "hard-locale.h" +@@ -324,6 +342,18 @@ #include "xstrtol-error.h" #include "xdectoint.h" @@ -2318,7 +2348,7 @@ index 4c17c00..b4fab1c 100644 /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "pr" -@@ -417,7 +447,20 @@ struct COLUMN +@@ -416,7 +446,20 @@ struct COLUMN typedef struct COLUMN COLUMN; @@ -2340,7 +2370,7 @@ index 4c17c00..b4fab1c 100644 static bool read_line (COLUMN *p); static bool print_page (void); static bool print_stored (COLUMN *p); -@@ -429,6 +472,7 @@ static void add_line_number (COLUMN *p); +@@ -428,6 +471,7 @@ static void add_line_number (COLUMN *p); static void getoptnum (char const *n_str, int min, int *num, char const *errfmt); static void getoptarg (char *arg, char switch_char, char *character, @@ -2348,7 +2378,7 @@ index 4c17c00..b4fab1c 100644 int *number); static void print_files (int number_of_files, char **av); static void init_parameters (int number_of_files); -@@ -442,7 +486,6 @@ static void store_char (char c); +@@ -441,7 +485,6 @@ static void store_char (char c); static void pad_down (unsigned int lines); static void read_rest_of_line (COLUMN *p); static void skip_read (COLUMN *p, int column_number); @@ -2356,7 +2386,7 @@ index 4c17c00..b4fab1c 100644 static void cleanup (void); static void print_sep_string (void); static void separator_string (char const *optarg_S); -@@ -454,7 +497,7 @@ static COLUMN *column_vector; +@@ -453,7 +496,7 @@ static COLUMN *column_vector; we store the leftmost columns contiguously in buff. To print a line from buff, get the index of the first character from line_vector[i], and print up to line_vector[i + 1]. */ @@ -2365,7 +2395,7 @@ index 4c17c00..b4fab1c 100644 /* Index of the position in buff where the next character will be stored. */ -@@ -558,7 +601,7 @@ static int chars_per_column; +@@ -557,7 +600,7 @@ static int chars_per_column; static bool untabify_input = false; /* (-e) The input tab character. */ @@ -2374,7 +2404,7 @@ index 4c17c00..b4fab1c 100644 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... where the leftmost column is 1. */ -@@ -568,7 +611,10 @@ static int chars_per_input_tab = 8; +@@ -567,7 +610,10 @@ static int chars_per_input_tab = 8; static bool tabify_output = false; /* (-i) The output tab character. */ @@ -2386,7 +2416,7 @@ index 4c17c00..b4fab1c 100644 /* (-i) The width of the output tab. */ static int chars_per_output_tab = 8; -@@ -638,7 +684,13 @@ static int line_number; +@@ -637,7 +683,13 @@ static int line_number; static bool numbered_lines = false; /* (-n) Character which follows each line number. */ @@ -2401,7 +2431,7 @@ index 4c17c00..b4fab1c 100644 /* (-n) line counting starts with 1st line of input file (not with 1st line of 1st page printed). */ -@@ -691,6 +743,7 @@ static bool use_col_separator = false; +@@ -690,6 +742,7 @@ static bool use_col_separator = false; -a|COLUMN|-m is a 'space' and with the -J option a 'tab'. */ static char const *col_sep_string = ""; static int col_sep_length = 0; @@ -2409,7 +2439,7 @@ index 4c17c00..b4fab1c 100644 static char *column_separator = (char *) " "; static char *line_separator = (char *) "\t"; -@@ -853,6 +906,13 @@ separator_string (char const *optarg_S) +@@ -852,6 +905,13 @@ separator_string (char const *optarg_S) integer_overflow (); col_sep_length = len; col_sep_string = optarg_S; @@ -2423,7 +2453,7 @@ index 4c17c00..b4fab1c 100644 } int -@@ -877,6 +937,21 @@ main (int argc, char **argv) +@@ -876,6 +936,21 @@ main (int argc, char **argv) atexit (close_stdout); @@ -2445,7 +2475,7 @@ index 4c17c00..b4fab1c 100644 n_files = 0; file_names = (argc > 1 ? xnmalloc (argc - 1, sizeof (char *)) -@@ -953,8 +1028,12 @@ main (int argc, char **argv) +@@ -952,8 +1027,12 @@ main (int argc, char **argv) break; case 'e': if (optarg) @@ -2460,7 +2490,7 @@ index 4c17c00..b4fab1c 100644 /* Could check tab width > 0. */ untabify_input = true; break; -@@ -967,8 +1046,12 @@ main (int argc, char **argv) +@@ -966,8 +1045,12 @@ main (int argc, char **argv) break; case 'i': if (optarg) @@ -2475,7 +2505,7 @@ index 4c17c00..b4fab1c 100644 /* Could check tab width > 0. */ tabify_output = true; break; -@@ -986,8 +1069,8 @@ main (int argc, char **argv) +@@ -985,8 +1068,8 @@ main (int argc, char **argv) case 'n': numbered_lines = true; if (optarg) @@ -2486,7 +2516,7 @@ index 4c17c00..b4fab1c 100644 break; case 'N': skip_count = false; -@@ -1012,6 +1095,7 @@ main (int argc, char **argv) +@@ -1011,6 +1094,7 @@ main (int argc, char **argv) /* Reset an additional input of -s, -S dominates -s */ col_sep_string = ""; col_sep_length = 0; @@ -2494,7 +2524,7 @@ index 4c17c00..b4fab1c 100644 use_col_separator = true; if (optarg) separator_string (optarg); -@@ -1166,10 +1250,45 @@ getoptnum (char const *n_str, int min, int *num, char const *err) +@@ -1165,7 +1249,8 @@ getoptnum (char const *n_str, int min, int *num, char const *err) a number. */ static void @@ -2502,6 +2532,11 @@ index 4c17c00..b4fab1c 100644 +getoptarg (char *arg, char switch_char, char *character, int *character_length, + int *character_width, int *number) { + if (!*arg) + { +@@ -1174,7 +1259,41 @@ getoptarg (char *arg, char switch_char, char *character, int *number) + } + if (!ISDIGIT (*arg)) - *character = *arg++; + { @@ -2542,7 +2577,7 @@ index 4c17c00..b4fab1c 100644 if (*arg) { long int tmp_long; -@@ -1198,6 +1317,11 @@ static void +@@ -1203,6 +1322,11 @@ static void init_parameters (int number_of_files) { int chars_used_by_number = 0; @@ -2554,7 +2589,7 @@ index 4c17c00..b4fab1c 100644 lines_per_body = lines_per_page - lines_per_header - lines_per_footer; if (lines_per_body <= 0) -@@ -1235,7 +1359,7 @@ init_parameters (int number_of_files) +@@ -1240,7 +1364,7 @@ init_parameters (int number_of_files) else col_sep_string = column_separator; @@ -2563,7 +2598,7 @@ index 4c17c00..b4fab1c 100644 use_col_separator = true; } /* It's rather pointless to define a TAB separator with column -@@ -1267,11 +1391,11 @@ init_parameters (int number_of_files) +@@ -1272,11 +1396,11 @@ init_parameters (int number_of_files) + TAB_WIDTH (chars_per_input_tab, chars_per_number); */ /* Estimate chars_per_text without any margin and keep it constant. */ @@ -2577,16 +2612,16 @@ index 4c17c00..b4fab1c 100644 /* The number is part of the column width unless we are printing files in parallel. */ -@@ -1280,7 +1404,7 @@ init_parameters (int number_of_files) +@@ -1285,7 +1409,7 @@ init_parameters (int number_of_files) } int sep_chars, useful_chars; -- if (INT_MULTIPLY_WRAPV (columns - 1, col_sep_length, &sep_chars)) -+ if (INT_MULTIPLY_WRAPV (columns - 1, col_sep_width, &sep_chars)) +- if (ckd_mul (&sep_chars, columns - 1, col_sep_length)) ++ if (ckd_mul (&sep_chars, columns - 1, col_sep_width)) sep_chars = INT_MAX; - if (INT_SUBTRACT_WRAPV (chars_per_line - chars_used_by_number, sep_chars, - &useful_chars)) -@@ -1303,7 +1427,7 @@ init_parameters (int number_of_files) + if (ckd_sub (&useful_chars, chars_per_line - chars_used_by_number, + sep_chars)) +@@ -1308,7 +1432,7 @@ init_parameters (int number_of_files) We've to use 8 as the lower limit, if we use chars_per_default_tab = 8 to expand a tab which is not an input_tab-char. */ free (clump_buff); @@ -2595,7 +2630,7 @@ index 4c17c00..b4fab1c 100644 } /* Open the necessary files, -@@ -1409,7 +1533,7 @@ init_funcs (void) +@@ -1414,7 +1538,7 @@ init_funcs (void) /* Enlarge p->start_position of first column to use the same form of padding_not_printed with all columns. */ @@ -2604,7 +2639,7 @@ index 4c17c00..b4fab1c 100644 /* This loop takes care of all but the rightmost column. */ -@@ -1443,7 +1567,7 @@ init_funcs (void) +@@ -1448,7 +1572,7 @@ init_funcs (void) } else { @@ -2613,7 +2648,7 @@ index 4c17c00..b4fab1c 100644 h_next = h + chars_per_column; } } -@@ -1740,9 +1864,9 @@ static void +@@ -1745,9 +1869,9 @@ static void align_column (COLUMN *p) { padding_not_printed = p->start_position; @@ -2625,7 +2660,7 @@ index 4c17c00..b4fab1c 100644 padding_not_printed = ANYWHERE; } -@@ -2017,13 +2141,13 @@ store_char (char c) +@@ -2021,13 +2145,13 @@ store_char (char c) /* May be too generous. */ buff = X2REALLOC (buff, &buff_allocated); } @@ -2641,7 +2676,7 @@ index 4c17c00..b4fab1c 100644 char *s; int num_width; -@@ -2040,22 +2164,24 @@ add_line_number (COLUMN *p) +@@ -2044,22 +2168,24 @@ add_line_number (COLUMN *p) /* Tabification is assumed for multiple columns, also for n-separators, but 'default n-separator = TAB' hasn't been given priority over equal column_width also specified by POSIX. */ @@ -2670,7 +2705,7 @@ index 4c17c00..b4fab1c 100644 output_position = POS_AFTER_TAB (chars_per_output_tab, output_position); } -@@ -2214,7 +2340,7 @@ print_white_space (void) +@@ -2218,7 +2344,7 @@ print_white_space (void) while (goal - h_old > 1 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) { @@ -2679,7 +2714,7 @@ index 4c17c00..b4fab1c 100644 h_old = h_new; } while (++h_old <= goal) -@@ -2234,6 +2360,7 @@ print_sep_string (void) +@@ -2238,6 +2364,7 @@ print_sep_string (void) { char const *s = col_sep_string; int l = col_sep_length; @@ -2687,7 +2722,7 @@ index 4c17c00..b4fab1c 100644 if (separators_not_printed <= 0) { -@@ -2245,6 +2372,7 @@ print_sep_string (void) +@@ -2249,6 +2376,7 @@ print_sep_string (void) { for (; separators_not_printed > 0; --separators_not_printed) { @@ -2695,7 +2730,7 @@ index 4c17c00..b4fab1c 100644 while (l-- > 0) { /* 3 types of sep_strings: spaces only, spaces and chars, -@@ -2258,12 +2386,15 @@ print_sep_string (void) +@@ -2262,12 +2390,15 @@ print_sep_string (void) } else { @@ -2712,7 +2747,7 @@ index 4c17c00..b4fab1c 100644 /* sep_string ends with some spaces */ if (spaces_not_printed > 0) print_white_space (); -@@ -2291,7 +2422,7 @@ print_clump (COLUMN *p, int n, char *clump) +@@ -2295,7 +2426,7 @@ print_clump (COLUMN *p, int n, char *clump) required number of tabs and spaces. */ static void @@ -2721,7 +2756,7 @@ index 4c17c00..b4fab1c 100644 { if (tabify_output) { -@@ -2315,6 +2446,74 @@ print_char (char c) +@@ -2319,6 +2450,74 @@ print_char (char c) putchar (c); } @@ -2796,7 +2831,7 @@ index 4c17c00..b4fab1c 100644 /* Skip to page PAGE before printing. PAGE may be larger than total number of pages. */ -@@ -2492,9 +2691,9 @@ read_line (COLUMN *p) +@@ -2496,9 +2695,9 @@ read_line (COLUMN *p) align_empty_cols = false; } @@ -2808,7 +2843,7 @@ index 4c17c00..b4fab1c 100644 padding_not_printed = ANYWHERE; } -@@ -2563,7 +2762,7 @@ print_stored (COLUMN *p) +@@ -2567,7 +2766,7 @@ print_stored (COLUMN *p) COLUMN *q; int line = p->current_line++; @@ -2817,7 +2852,7 @@ index 4c17c00..b4fab1c 100644 /* FIXME UMR: Uninitialized memory read: * This is occurring while in: -@@ -2575,7 +2774,7 @@ print_stored (COLUMN *p) +@@ -2579,7 +2778,7 @@ print_stored (COLUMN *p) xmalloc [xmalloc.c:94] init_store_cols [pr.c:1648] */ @@ -2826,7 +2861,7 @@ index 4c17c00..b4fab1c 100644 pad_vertically = true; -@@ -2595,9 +2794,9 @@ print_stored (COLUMN *p) +@@ -2599,9 +2798,9 @@ print_stored (COLUMN *p) } } @@ -2838,7 +2873,7 @@ index 4c17c00..b4fab1c 100644 padding_not_printed = ANYWHERE; } -@@ -2610,8 +2809,8 @@ print_stored (COLUMN *p) +@@ -2614,8 +2813,8 @@ print_stored (COLUMN *p) if (spaces_not_printed == 0) { output_position = p->start_position + end_vector[line]; @@ -2849,7 +2884,7 @@ index 4c17c00..b4fab1c 100644 } return true; -@@ -2630,7 +2829,7 @@ print_stored (COLUMN *p) +@@ -2634,7 +2833,7 @@ print_stored (COLUMN *p) number of characters is 1.) */ static int @@ -2858,7 +2893,7 @@ index 4c17c00..b4fab1c 100644 { unsigned char uc = c; char *s = clump_buff; -@@ -2640,10 +2839,10 @@ char_to_clump (char c) +@@ -2644,10 +2843,10 @@ char_to_clump (char c) int chars; int chars_per_c = 8; @@ -2871,7 +2906,7 @@ index 4c17c00..b4fab1c 100644 { width = TAB_WIDTH (chars_per_c, input_position); -@@ -2724,6 +2923,164 @@ char_to_clump (char c) +@@ -2728,6 +2927,164 @@ char_to_clump (char c) return chars; } @@ -3037,13 +3072,13 @@ index 4c17c00..b4fab1c 100644 looking for more options and printing the next batch of files. diff --git a/src/sort.c b/src/sort.c -index 3b775d6..a0ba243 100644 +index e779845..1f5c337 100644 --- a/src/sort.c +++ b/src/sort.c -@@ -29,6 +29,14 @@ +@@ -28,6 +28,14 @@ + #include #include #include - #include +#if HAVE_WCHAR_H +# include +#endif @@ -3054,8 +3089,8 @@ index 3b775d6..a0ba243 100644 + #include "system.h" #include "argmatch.h" - #include "die.h" -@@ -159,14 +167,39 @@ static int thousands_sep; + #include "assure.h" +@@ -157,14 +165,39 @@ static int thousands_sep; /* We currently ignore multi-byte grouping chars. */ static bool thousands_sep_ignored; @@ -3096,7 +3131,7 @@ index 3b775d6..a0ba243 100644 /* The kind of blanks for '-b' to skip in various options. */ enum blanktype { bl_start, bl_end, bl_both }; -@@ -343,13 +376,11 @@ static bool stable; +@@ -341,13 +374,11 @@ static bool stable; /* An int value outside char range. */ enum { NON_CHAR = CHAR_MAX + 1 }; @@ -3113,7 +3148,7 @@ index 3b775d6..a0ba243 100644 /* Flag to remove consecutive duplicate lines from the output. Only the last of a sequence of equal lines will be output. */ -@@ -805,6 +836,46 @@ reap_all (void) +@@ -803,6 +834,46 @@ reap_all (void) reap (-1); } @@ -3160,7 +3195,7 @@ index 3b775d6..a0ba243 100644 /* Clean up any remaining temporary files. */ static void -@@ -1272,7 +1343,7 @@ zaptemp (char const *name) +@@ -1270,7 +1341,7 @@ zaptemp (char const *name) free (node); } @@ -3169,7 +3204,7 @@ index 3b775d6..a0ba243 100644 static int struct_month_cmp (void const *m1, void const *m2) -@@ -1287,7 +1358,7 @@ struct_month_cmp (void const *m1, void const *m2) +@@ -1285,7 +1356,7 @@ struct_month_cmp (void const *m1, void const *m2) /* Initialize the character class tables. */ static void @@ -3178,7 +3213,7 @@ index 3b775d6..a0ba243 100644 { size_t i; -@@ -1299,7 +1370,7 @@ inittables (void) +@@ -1297,7 +1368,7 @@ inittables (void) fold_toupper[i] = toupper (i); } @@ -3187,7 +3222,7 @@ index 3b775d6..a0ba243 100644 /* If we're not in the "C" locale, read different names for months. */ if (hard_LC_TIME) { -@@ -1381,6 +1452,84 @@ specify_nmerge (int oi, char c, char const *s) +@@ -1379,6 +1450,84 @@ specify_nmerge (int oi, char c, char const *s) xstrtol_fatal (e, oi, c, long_options, s); } @@ -3272,7 +3307,7 @@ index 3b775d6..a0ba243 100644 /* Specify the amount of main memory to use when sorting. */ static void specify_sort_size (int oi, char c, char const *s) -@@ -1612,7 +1761,7 @@ buffer_linelim (struct buffer const *buf) +@@ -1610,7 +1759,7 @@ buffer_linelim (struct buffer const *buf) by KEY in LINE. */ static char * @@ -3281,7 +3316,7 @@ index 3b775d6..a0ba243 100644 { char *ptr = line->text, *lim = ptr + line->length - 1; size_t sword = key->sword; -@@ -1621,10 +1770,10 @@ begfield (struct line const *line, struct keyfield const *key) +@@ -1619,10 +1768,10 @@ begfield (struct line const *line, struct keyfield const *key) /* The leading field separator itself is included in a field when -t is absent. */ @@ -3294,7 +3329,7 @@ index 3b775d6..a0ba243 100644 ++ptr; if (ptr < lim) ++ptr; -@@ -1650,12 +1799,71 @@ begfield (struct line const *line, struct keyfield const *key) +@@ -1648,12 +1797,71 @@ begfield (struct line const *line, struct keyfield const *key) return ptr; } @@ -3367,7 +3402,7 @@ index 3b775d6..a0ba243 100644 { char *ptr = line->text, *lim = ptr + line->length - 1; size_t eword = key->eword, echar = key->echar; -@@ -1670,10 +1878,10 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1668,10 +1876,10 @@ limfield (struct line const *line, struct keyfield const *key) 'beginning' is the first character following the delimiting TAB. Otherwise, leave PTR pointing at the first 'blank' character after the preceding field. */ @@ -3380,7 +3415,7 @@ index 3b775d6..a0ba243 100644 ++ptr; if (ptr < lim && (eword || echar)) ++ptr; -@@ -1719,10 +1927,10 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1717,10 +1925,10 @@ limfield (struct line const *line, struct keyfield const *key) */ /* Make LIM point to the end of (one byte past) the current field. */ @@ -3393,7 +3428,7 @@ index 3b775d6..a0ba243 100644 if (newlim) lim = newlim; } -@@ -1753,6 +1961,130 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1751,6 +1959,130 @@ limfield (struct line const *line, struct keyfield const *key) return ptr; } @@ -3524,7 +3559,7 @@ index 3b775d6..a0ba243 100644 /* Fill BUF reading from FP, moving buf->left bytes from the end of buf->buf to the beginning first. If EOF is reached and the file wasn't terminated by a newline, supply one. Set up BUF's line -@@ -1839,8 +2171,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file) +@@ -1837,8 +2169,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file) else { if (key->skipsblanks) @@ -3549,7 +3584,7 @@ index 3b775d6..a0ba243 100644 line->keybeg = line_start; } } -@@ -1978,12 +2324,10 @@ find_unit_order (char const *number) +@@ -1976,12 +2322,10 @@ find_unit_order (char const *number) ATTRIBUTE_PURE static int @@ -3565,7 +3600,7 @@ index 3b775d6..a0ba243 100644 int diff = find_unit_order (a) - find_unit_order (b); return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep)); -@@ -1995,7 +2339,7 @@ human_numcompare (char const *a, char const *b) +@@ -1993,7 +2337,7 @@ human_numcompare (char const *a, char const *b) ATTRIBUTE_PURE static int @@ -3574,7 +3609,7 @@ index 3b775d6..a0ba243 100644 { while (blanks[to_uchar (*a)]) a++; -@@ -2005,6 +2349,25 @@ numcompare (char const *a, char const *b) +@@ -2003,6 +2347,25 @@ numcompare (char const *a, char const *b) return strnumcmp (a, b, decimal_point, thousands_sep); } @@ -3600,7 +3635,7 @@ index 3b775d6..a0ba243 100644 static int nan_compare (long double a, long double b) { -@@ -2046,7 +2409,7 @@ general_numcompare (char const *sa, char const *sb) +@@ -2044,7 +2407,7 @@ general_numcompare (char const *sa, char const *sb) Return 0 if the name in S is not recognized. */ static int @@ -3609,7 +3644,7 @@ index 3b775d6..a0ba243 100644 { size_t lo = 0; size_t hi = MONTHS_PER_YEAR; -@@ -2322,15 +2685,14 @@ debug_key (struct line const *line, struct keyfield const *key) +@@ -2320,15 +2683,14 @@ debug_key (struct line const *line, struct keyfield const *key) char saved = *lim; *lim = '\0'; @@ -3627,7 +3662,7 @@ index 3b775d6..a0ba243 100644 else if (key->general_numeric) ignore_value (strtold (beg, &tighter_lim)); else if (key->numeric || key->human_numeric) -@@ -2476,7 +2838,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2474,7 +2836,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) /* Warn about significant leading blanks. */ bool implicit_skip = key_numeric (key) || key->month; bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */ @@ -3636,7 +3671,7 @@ index 3b775d6..a0ba243 100644 && ((!key->skipsblanks && !implicit_skip) || (!key->skipsblanks && key->schar) || (!key->skipeblanks && key->echar))) -@@ -2524,9 +2886,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2522,9 +2884,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) bool number_locale_warned = false; if (basic_numeric_field_span) { @@ -3649,7 +3684,7 @@ index 3b775d6..a0ba243 100644 { error (0, 0, _("field separator %s is treated as a " -@@ -2537,9 +2899,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2535,9 +2897,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) } if (basic_numeric_field_span || general_numeric_field_span) { @@ -3662,7 +3697,7 @@ index 3b775d6..a0ba243 100644 { error (0, 0, _("field separator %s is treated as a " -@@ -2547,19 +2909,19 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2545,19 +2907,19 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) quote (((char []) {decimal_point, 0}))); number_locale_warned = true; } @@ -3686,7 +3721,7 @@ index 3b775d6..a0ba243 100644 } } -@@ -2570,7 +2932,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2568,7 +2930,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) { error (0, 0, _("%snumbers use %s as a decimal point in this locale"), @@ -3695,7 +3730,7 @@ index 3b775d6..a0ba243 100644 quote (((char []) {decimal_point, 0}))); } -@@ -2612,11 +2974,87 @@ diff_reversed (int diff, bool reversed) +@@ -2610,11 +2972,87 @@ diff_reversed (int diff, bool reversed) return reversed ? (diff < 0) - (diff > 0) : diff; } @@ -3784,16 +3819,16 @@ index 3b775d6..a0ba243 100644 { struct keyfield *key = keylist; -@@ -2697,7 +3135,7 @@ keycompare (struct line const *a, struct line const *b) +@@ -2695,7 +3133,7 @@ keycompare (struct line const *a, struct line const *b) else if (key->human_numeric) diff = human_numcompare (ta, tb); else if (key->month) -- diff = getmonth (ta, NULL) - getmonth (tb, NULL); -+ diff = getmonth (ta, tlena, NULL) - getmonth (tb, tlenb, NULL); +- diff = getmonth (ta, nullptr) - getmonth (tb, nullptr); ++ diff = getmonth (ta, tlena, nullptr) - getmonth (tb, tlenb, nullptr); else if (key->random) diff = compare_random (ta, tlena, tb, tlenb); else if (key->version) -@@ -2807,6 +3245,211 @@ keycompare (struct line const *a, struct line const *b) +@@ -2805,6 +3243,211 @@ keycompare (struct line const *a, struct line const *b) return diff_reversed (diff, key->reverse); } @@ -4005,7 +4040,7 @@ index 3b775d6..a0ba243 100644 /* Compare two lines A and B, returning negative, zero, or positive depending on whether A compares less than, equal to, or greater than B. */ -@@ -2834,7 +3477,7 @@ compare (struct line const *a, struct line const *b) +@@ -2832,7 +3475,7 @@ compare (struct line const *a, struct line const *b) diff = - NONZERO (blen); else if (blen == 0) diff = 1; @@ -4014,7 +4049,7 @@ index 3b775d6..a0ba243 100644 { /* xmemcoll0 is a performance enhancement as it will not unconditionally write '\0' after the -@@ -4222,6 +4865,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype) +@@ -4220,6 +4863,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype) break; case 'f': key->translate = fold_toupper; @@ -4022,7 +4057,7 @@ index 3b775d6..a0ba243 100644 break; case 'g': key->general_numeric = true; -@@ -4301,7 +4945,7 @@ main (int argc, char **argv) +@@ -4299,7 +4943,7 @@ main (int argc, char **argv) initialize_exit_failure (SORT_FAILURE); hard_LC_COLLATE = hard_locale (LC_COLLATE); @@ -4031,7 +4066,7 @@ index 3b775d6..a0ba243 100644 hard_LC_TIME = hard_locale (LC_TIME); #endif -@@ -4324,6 +4968,29 @@ main (int argc, char **argv) +@@ -4322,6 +4966,29 @@ main (int argc, char **argv) thousands_sep = NON_CHAR; } @@ -4061,7 +4096,7 @@ index 3b775d6..a0ba243 100644 have_read_stdin = false; inittables (); -@@ -4598,13 +5265,34 @@ main (int argc, char **argv) +@@ -4592,13 +5259,34 @@ main (int argc, char **argv) case 't': { @@ -4071,7 +4106,7 @@ index 3b775d6..a0ba243 100644 + size_t newtab_length = 1; + strncpy (newtab, optarg, MB_LEN_MAX); + if (! newtab[0]) - die (SORT_FAILURE, 0, _("empty tab")); + error (SORT_FAILURE, 0, _("empty tab")); - if (optarg[1]) +#if HAVE_MBRTOWC + if (MB_CUR_MAX > 1) @@ -4100,14 +4135,14 @@ index 3b775d6..a0ba243 100644 else { /* Provoke with 'sort -txx'. Complain about -@@ -4615,9 +5303,11 @@ main (int argc, char **argv) - quote (optarg)); +@@ -4609,9 +5297,11 @@ main (int argc, char **argv) + quote (optarg)); } } - if (tab != TAB_DEFAULT && tab != newtab) + if (tab_length && (tab_length != newtab_length + || memcmp (tab, newtab, tab_length) != 0)) - die (SORT_FAILURE, 0, _("incompatible tabs")); + error (SORT_FAILURE, 0, _("incompatible tabs")); - tab = newtab; + memcpy (tab, newtab, newtab_length); + tab_length = newtab_length; @@ -4115,7 +4150,7 @@ index 3b775d6..a0ba243 100644 break; diff --git a/src/unexpand.c b/src/unexpand.c -index 7d6100f..04cd646 100644 +index 5a2283f..f24ef76 100644 --- a/src/unexpand.c +++ b/src/unexpand.c @@ -38,6 +38,9 @@ @@ -4126,12 +4161,12 @@ index 7d6100f..04cd646 100644 +#include + #include "system.h" - #include "die.h" + #include "expand-common.h" -@@ -106,24 +109,47 @@ unexpand (void) +@@ -104,24 +107,47 @@ unexpand (void) { /* Input stream. */ - FILE *fp = next_file (NULL); + FILE *fp = next_file (nullptr); + mb_file_t mbf; /* The array of pending blanks. In non-POSIX locales, blanks can @@ -4179,7 +4214,7 @@ index 7d6100f..04cd646 100644 /* If true, perform translations. */ bool convert = true; -@@ -157,12 +183,44 @@ unexpand (void) +@@ -155,12 +181,44 @@ unexpand (void) do { @@ -4227,9 +4262,9 @@ index 7d6100f..04cd646 100644 if (blank) { -@@ -179,16 +237,16 @@ unexpand (void) +@@ -177,16 +235,16 @@ unexpand (void) if (next_tab_column < column) - die (EXIT_FAILURE, 0, _("input line is too long")); + error (EXIT_FAILURE, 0, _("input line is too long")); - if (c == '\t') + if (mb_iseq (c, '\t')) @@ -4247,7 +4282,7 @@ index 7d6100f..04cd646 100644 if (! (prev_blank && column == next_tab_column)) { -@@ -196,13 +254,14 @@ unexpand (void) +@@ -194,13 +252,14 @@ unexpand (void) will be replaced by tabs. */ if (column == next_tab_column) one_blank_before_tab_stop = true; @@ -4264,7 +4299,7 @@ index 7d6100f..04cd646 100644 } /* Discard pending blanks, unless it was a single -@@ -210,7 +269,7 @@ unexpand (void) +@@ -208,7 +267,7 @@ unexpand (void) pending = one_blank_before_tab_stop; } } @@ -4273,7 +4308,7 @@ index 7d6100f..04cd646 100644 { /* Go back one column, and force recalculation of the next tab stop. */ -@@ -220,16 +279,20 @@ unexpand (void) +@@ -218,16 +277,20 @@ unexpand (void) } else { @@ -4282,7 +4317,7 @@ index 7d6100f..04cd646 100644 + const uintmax_t orig_column = column; + column += mb_width (c); + if (column < orig_column) - die (EXIT_FAILURE, 0, _("input line is too long")); + error (EXIT_FAILURE, 0, _("input line is too long")); } if (pending) @@ -4295,10 +4330,10 @@ index 7d6100f..04cd646 100644 + for (int n = 0; n < pending; ++n) + mb_putc (pending_blank[n], stdout); + if (ferror (stdout)) - die (EXIT_FAILURE, errno, _("write error")); + write_error (); pending = 0; one_blank_before_tab_stop = false; -@@ -239,16 +302,17 @@ unexpand (void) +@@ -237,16 +300,17 @@ unexpand (void) convert &= convert_entire_line || blank; } @@ -4312,7 +4347,7 @@ index 7d6100f..04cd646 100644 - if (putchar (c) < 0) + mb_putc (c, stdout); + if (ferror (stdout)) - die (EXIT_FAILURE, errno, _("write error")); + write_error (); } - while (c != '\n'); + while (!mb_iseq (c, '\n')); @@ -4320,7 +4355,7 @@ index 7d6100f..04cd646 100644 } diff --git a/src/uniq.c b/src/uniq.c -index e5996f0..871d47c 100644 +index fab04de..2e96dcb 100644 --- a/src/uniq.c +++ b/src/uniq.c @@ -21,6 +21,17 @@ @@ -4341,7 +4376,7 @@ index e5996f0..871d47c 100644 #include "system.h" #include "argmatch.h" #include "linebuffer.h" -@@ -33,6 +44,18 @@ +@@ -31,6 +42,18 @@ #include "memcasecmp.h" #include "quote.h" @@ -4360,7 +4395,7 @@ index e5996f0..871d47c 100644 /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "uniq" -@@ -139,6 +162,10 @@ enum +@@ -137,6 +160,10 @@ enum GROUP_OPTION = CHAR_MAX + 1 }; @@ -4370,8 +4405,8 @@ index e5996f0..871d47c 100644 + static struct option const longopts[] = { - {"count", no_argument, NULL, 'c'}, -@@ -254,7 +281,7 @@ size_opt (char const *opt, char const *msgid) + {"count", no_argument, nullptr, 'c'}, +@@ -252,7 +279,7 @@ size_opt (char const *opt, char const *msgid) ATTRIBUTE_PURE static char * @@ -4380,7 +4415,7 @@ index e5996f0..871d47c 100644 { size_t count; char const *lp = line->buffer; -@@ -274,6 +301,83 @@ find_field (struct linebuffer const *line) +@@ -272,6 +299,83 @@ find_field (struct linebuffer const *line) return line->buffer + i; } @@ -4464,7 +4499,7 @@ index e5996f0..871d47c 100644 /* Return false if two strings OLD and NEW match, true if not. OLD and NEW point not to the beginnings of the lines but rather to the beginnings of the fields to compare. -@@ -494,6 +598,19 @@ main (int argc, char **argv) +@@ -495,6 +599,19 @@ main (int argc, char **argv) atexit (close_stdout); @@ -4485,7 +4520,7 @@ index e5996f0..871d47c 100644 skip_fields = 0; check_chars = SIZE_MAX; diff --git a/tests/Coreutils.pm b/tests/Coreutils.pm -index fad7ab9..c9021a6 100644 +index f147401..3ce5da9 100644 --- a/tests/Coreutils.pm +++ b/tests/Coreutils.pm @@ -269,6 +269,9 @@ sub run_tests ($$$$$) @@ -4723,19 +4758,19 @@ index 0000000..26c95de + +Exit $fail diff --git a/tests/local.mk b/tests/local.mk -index 0f77786..dbe1843 100644 +index b74a4a2..fe6e557 100644 --- a/tests/local.mk +++ b/tests/local.mk -@@ -381,6 +381,8 @@ all_tests = \ - tests/misc/sort-discrim.sh \ - tests/misc/sort-files0-from.pl \ - tests/misc/sort-float.sh \ +@@ -384,6 +384,8 @@ all_tests = \ + tests/sort/sort-discrim.sh \ + tests/sort/sort-files0-from.pl \ + tests/sort/sort-float.sh \ + tests/misc/sort-mb-tests.sh \ + tests/i18n/sort.sh \ - tests/misc/sort-h-thousands-sep.sh \ - tests/misc/sort-merge.pl \ - tests/misc/sort-merge-fdlimit.sh \ -@@ -582,6 +584,7 @@ all_tests = \ + tests/sort/sort-h-thousands-sep.sh \ + tests/sort/sort-merge.pl \ + tests/sort/sort-merge-fdlimit.sh \ +@@ -585,6 +587,7 @@ all_tests = \ tests/du/threshold.sh \ tests/du/trailing-slash.sh \ tests/du/two-args.sh \ @@ -4743,7 +4778,7 @@ index 0f77786..dbe1843 100644 tests/id/gnu-zero-uids.sh \ tests/id/no-context.sh \ tests/id/context.sh \ -@@ -734,6 +737,7 @@ all_tests = \ +@@ -738,6 +741,7 @@ all_tests = \ tests/touch/read-only.sh \ tests/touch/relative.sh \ tests/touch/trailing-slash.sh \ @@ -4752,7 +4787,7 @@ index 0f77786..dbe1843 100644 # See tests/factor/create-test.sh. diff --git a/tests/misc/expand.pl b/tests/misc/expand.pl -index 7a77e6f..27f6652 100755 +index 06261ac..7dd813e 100755 --- a/tests/misc/expand.pl +++ b/tests/misc/expand.pl @@ -27,6 +27,15 @@ my $prog = 'expand'; @@ -4819,7 +4854,7 @@ index 7a77e6f..27f6652 100755 my $verbose = $ENV{VERBOSE}; diff --git a/tests/misc/fold.pl b/tests/misc/fold.pl -index 2834f92..bc1616a 100755 +index a94072f..136a82e 100755 --- a/tests/misc/fold.pl +++ b/tests/misc/fold.pl @@ -20,9 +20,18 @@ use strict; @@ -4892,7 +4927,7 @@ index 2834f92..bc1616a 100755 my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); exit $fail; diff --git a/tests/misc/join.pl b/tests/misc/join.pl -index 06ad777..be40204 100755 +index 2ca8567..1d01a3d 100755 --- a/tests/misc/join.pl +++ b/tests/misc/join.pl @@ -25,6 +25,15 @@ my $limits = getlimits (); @@ -5012,32 +5047,29 @@ index 0000000..11836ba +compare exp out || { fail=1; cat out; } + +Exit $fail -diff --git a/tests/misc/sort-merge.pl b/tests/misc/sort-merge.pl -index 7eb4574..eda884c 100755 ---- a/tests/misc/sort-merge.pl -+++ b/tests/misc/sort-merge.pl -@@ -26,6 +26,15 @@ my $prog = 'sort'; - # Turn off localization of executable's output. - @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; +diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl +index d78a1bc..2b9137d 100755 +--- a/tests/misc/unexpand.pl ++++ b/tests/misc/unexpand.pl +@@ -27,6 +27,14 @@ my $limits = getlimits (); -+my $mb_locale; -+# uncommented according to upstream commit enabling multibyte paths -+$mb_locale = $ENV{LOCALE_FR_UTF8}; + my $prog = 'unexpand'; + ++# comment out next line to disable multibyte tests ++my $mb_locale = $ENV{LOCALE_FR_UTF8}; +! defined $mb_locale || $mb_locale eq 'none' + and $mb_locale = 'C'; + +my $try = "Try \`$prog --help' for more information.\n"; +my $inval = "$prog: invalid byte, character or field list\n$try"; + - # three empty files and one that says 'foo' - my @inputs = (+(map{{IN=> {"empty$_"=> ''}}}1..3), {IN=> {foo=> "foo\n"}}); - -@@ -77,6 +86,39 @@ my @Tests = - {OUT=>$big_input}], + my @Tests = + ( + ['a1', {IN=> ' 'x 1 ."y\n"}, {OUT=> ' 'x 1 ."y\n"}], +@@ -128,6 +136,37 @@ my @Tests = + ['ts2', '-t5,8', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t y\n"}], ); -+# Add _POSIX2_VERSION=199209 to the environment of each test -+# that uses an old-style option like +1. +if ($mb_locale ne 'C') + { + # Duplicate each test vector, appending "-mb" to the test name and @@ -5049,7 +5081,7 @@ index 7eb4574..eda884c 100755 + my @new_t = @$t; + my $test_name = shift @new_t; + -+ # Depending on whether sort is multi-byte-patched, ++ # Depending on whether unexpand is multi-byte-patched, + # it emits different diagnostics: + # non-MB: invalid byte or field list + # MB: invalid byte, character or field list @@ -5061,7 +5093,7 @@ index 7eb4574..eda884c 100755 + push @new_t, $sub; + push @$t, $sub; + } -+ next if ($test_name =~ "nmerge-."); ++ next if ($test_name =~ 'b-1'); + push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; + } + push @Tests, @new; @@ -5072,43 +5104,44 @@ index 7eb4574..eda884c 100755 my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; -diff --git a/tests/misc/sort.pl b/tests/misc/sort.pl -index 0b0adca..fd27821 100755 ---- a/tests/misc/sort.pl -+++ b/tests/misc/sort.pl -@@ -24,10 +24,15 @@ my $prog = 'sort'; - # Turn off localization of executable's output. - @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; +diff --git a/tests/pr/pr-tests.pl b/tests/pr/pr-tests.pl +index eafc13d..c1eca2a 100755 +--- a/tests/pr/pr-tests.pl ++++ b/tests/pr/pr-tests.pl +@@ -24,6 +24,15 @@ use strict; + my $prog = 'pr'; + my $normalize_strerror = "s/': .*/'/"; --my $mb_locale = $ENV{LOCALE_FR_UTF8}; +my $mb_locale; -+#Comment out next line to disable multibyte tests ++#Uncomment the following line to enable multibyte tests +$mb_locale = $ENV{LOCALE_FR_UTF8}; - ! defined $mb_locale || $mb_locale eq 'none' - and $mb_locale = 'C'; - ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ +my $try = "Try \`$prog --help' for more information.\n"; +my $inval = "$prog: invalid byte, character or field list\n$try"; + - # Since each test is run with a file name and with redirected stdin, - # the name in the diagnostic is either the file name or "-". - # Normalize each diagnostic to use '-'. -@@ -423,6 +428,38 @@ foreach my $t (@Tests) - } - } + my @tv = ( + + # -b option is no longer an official option. But it's still working to +@@ -515,8 +524,48 @@ push @Tests, + {IN=>"x\tx\tx\tx\tx\nx\tx\tx\tx\tx\n"}, + {OUT=>"x\tx\tx\tx\tx\tx\tx\tx\tx\tx\n"} ]; ++# Add _POSIX2_VERSION=199209 to the environment of each test ++# that uses an old-style option like +1. +if ($mb_locale ne 'C') -+ { ++ { + # Duplicate each test vector, appending "-mb" to the test name and + # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we + # provide coverage for the distro-added multi-byte code paths. + my @new; + foreach my $t (@Tests) -+ { ++ { + my @new_t = @$t; + my $test_name = shift @new_t; + -+ # Depending on whether sort is multi-byte-patched, ++ # Depending on whether pr is multi-byte-patched, + # it emits different diagnostics: + # non-MB: invalid byte or field list + # MB: invalid byte, character or field list @@ -5120,49 +5153,52 @@ index 0b0adca..fd27821 100755 + push @new_t, $sub; + push @$t, $sub; + } -+ #disable several failing tests until investigation, disable all tests with envvars set -+ next if (grep {ref $_ eq 'HASH' && exists $_->{ENV}} (@new_t)); -+ next if ($test_name =~ "18g" or $test_name =~ "sort-numeric" or $test_name =~ "08[ab]" or $test_name =~ "03[def]" or $test_name =~ "h4" or $test_name =~ "n1" or $test_name =~ "2[01]a"); -+ next if ($test_name =~ "11[ab]"); # avoid FP: expected result differs to MB result due to collation rules. ++ #temporarily skip some failing tests ++ next if ($test_name =~ "col-0" or $test_name =~ "col-inval" or $test_name =~ "asan1"); + push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } ++ } + push @Tests, @new; -+ } ++ } + @Tests = triple_test \@Tests; - # Remember that triple_test creates from each test with exactly one "IN" -@@ -432,6 +469,7 @@ foreach my $t (@Tests) - # Remove the IN_PIPE version of the "output-is-input" test above. - # The others aren't susceptible because they have three inputs each. - @Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; -+@Tests = grep {$_->[0] ne 'output-is-input-mb.p'} @Tests; - ++# Remember that triple_test creates from each test with exactly one "IN" ++# file two more tests (.p and .r suffix on name) corresponding to reading ++# input from a file and from a pipe. The pipe-reading test would fail ++# due to a race condition about 1 in 20 times. ++# Remove the IN_PIPE version of the "output-is-input" test above. ++# The others aren't susceptible because they have three inputs each. ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; ++ my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; -diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl -index 2e1906f..fe66012 100755 ---- a/tests/misc/unexpand.pl -+++ b/tests/misc/unexpand.pl -@@ -27,6 +27,14 @@ my $limits = getlimits (); - my $prog = 'unexpand'; +diff --git a/tests/sort/sort-merge.pl b/tests/sort/sort-merge.pl +index bd439ef..2ccdf87 100755 +--- a/tests/sort/sort-merge.pl ++++ b/tests/sort/sort-merge.pl +@@ -26,6 +26,15 @@ my $prog = 'sort'; + # Turn off localization of executable's output. + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; -+# comment out next line to disable multibyte tests -+my $mb_locale = $ENV{LOCALE_FR_UTF8}; ++my $mb_locale; ++# uncommented according to upstream commit enabling multibyte paths ++$mb_locale = $ENV{LOCALE_FR_UTF8}; +! defined $mb_locale || $mb_locale eq 'none' + and $mb_locale = 'C'; + +my $try = "Try \`$prog --help' for more information.\n"; +my $inval = "$prog: invalid byte, character or field list\n$try"; + - my @Tests = - ( - ['a1', {IN=> ' 'x 1 ."y\n"}, {OUT=> ' 'x 1 ."y\n"}], -@@ -128,6 +136,37 @@ my @Tests = - ['ts2', '-t5,8', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t y\n"}], + # three empty files and one that says 'foo' + my @inputs = (+(map{{IN=> {"empty$_"=> ''}}}1..3), {IN=> {foo=> "foo\n"}}); + +@@ -77,6 +86,39 @@ my @Tests = + {OUT=>$big_input}], ); ++# Add _POSIX2_VERSION=199209 to the environment of each test ++# that uses an old-style option like +1. +if ($mb_locale ne 'C') + { + # Duplicate each test vector, appending "-mb" to the test name and @@ -5174,7 +5210,7 @@ index 2e1906f..fe66012 100755 + my @new_t = @$t; + my $test_name = shift @new_t; + -+ # Depending on whether unexpand is multi-byte-patched, ++ # Depending on whether sort is multi-byte-patched, + # it emits different diagnostics: + # non-MB: invalid byte or field list + # MB: invalid byte, character or field list @@ -5186,7 +5222,7 @@ index 2e1906f..fe66012 100755 + push @new_t, $sub; + push @$t, $sub; + } -+ next if ($test_name =~ 'b-1'); ++ next if ($test_name =~ "nmerge-."); + push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; + } + push @Tests, @new; @@ -5197,120 +5233,43 @@ index 2e1906f..fe66012 100755 my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; -diff --git a/tests/misc/uniq.pl b/tests/misc/uniq.pl -index aa163cd..91d617d 100755 ---- a/tests/misc/uniq.pl -+++ b/tests/misc/uniq.pl -@@ -23,9 +23,17 @@ my $limits = getlimits (); - my $prog = 'uniq'; - my $try = "Try '$prog --help' for more information.\n"; - -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ +diff --git a/tests/sort/sort.pl b/tests/sort/sort.pl +index 46f1d7a..bb38f5b 100755 +--- a/tests/sort/sort.pl ++++ b/tests/sort/sort.pl +@@ -24,10 +24,15 @@ my $prog = 'sort'; # Turn off localization of executable's output. @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; +-my $mb_locale = $ENV{LOCALE_FR_UTF8}; +my $mb_locale; +#Comment out next line to disable multibyte tests +$mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ - # When possible, create a "-z"-testing variant of each test. - sub add_z_variants($) - { -@@ -262,6 +270,53 @@ foreach my $t (@Tests) - and push @$t, {ENV=>'_POSIX2_VERSION=199209'}; - } - -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether uniq is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ # In test #145, replace the each ‘...’ by '...'. -+ if ($test_name =~ "145") -+ { -+ my $sub = { ERR_SUBST => "s/‘([^’]+)’/'\$1'/g"}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ next if ( $test_name =~ "schar" -+ or $test_name =~ "^obs-plus" -+ or $test_name =~ "119"); -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ -+# Remember that triple_test creates from each test with exactly one "IN" -+# file two more tests (.p and .r suffix on name) corresponding to reading -+# input from a file and from a pipe. The pipe-reading test would fail -+# due to a race condition about 1 in 20 times. -+# Remove the IN_PIPE version of the "output-is-input" test above. -+# The others aren't susceptible because they have three inputs each. -+ -+@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; -+ - @Tests = add_z_variants \@Tests; - @Tests = triple_test \@Tests; - -diff --git a/tests/pr/pr-tests.pl b/tests/pr/pr-tests.pl -index 7ac6d4c..ae6cc35 100755 ---- a/tests/pr/pr-tests.pl -+++ b/tests/pr/pr-tests.pl -@@ -24,6 +24,15 @@ use strict; - my $prog = 'pr'; - my $normalize_strerror = "s/': .*/'/"; + ! defined $mb_locale || $mb_locale eq 'none' + and $mb_locale = 'C'; -+my $mb_locale; -+#Uncomment the following line to enable multibyte tests -+$mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ +my $try = "Try \`$prog --help' for more information.\n"; +my $inval = "$prog: invalid byte, character or field list\n$try"; + - my @tv = ( - - # -b option is no longer an official option. But it's still working to -@@ -512,8 +521,48 @@ push @Tests, - {IN=>"x\tx\tx\tx\tx\nx\tx\tx\tx\tx\n"}, - {OUT=>"x\tx\tx\tx\tx\tx\tx\tx\tx\tx\n"} ]; + # Since each test is run with a file name and with redirected stdin, + # the name in the diagnostic is either the file name or "-". + # Normalize each diagnostic to use '-'. +@@ -423,6 +428,38 @@ foreach my $t (@Tests) + } + } -+# Add _POSIX2_VERSION=199209 to the environment of each test -+# that uses an old-style option like +1. +if ($mb_locale ne 'C') -+ { ++ { + # Duplicate each test vector, appending "-mb" to the test name and + # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we + # provide coverage for the distro-added multi-byte code paths. + my @new; + foreach my $t (@Tests) -+ { ++ { + my @new_t = @$t; + my $test_name = shift @new_t; + -+ # Depending on whether pr is multi-byte-patched, ++ # Depending on whether sort is multi-byte-patched, + # it emits different diagnostics: + # non-MB: invalid byte or field list + # MB: invalid byte, character or field list @@ -5322,26 +5281,26 @@ index 7ac6d4c..ae6cc35 100755 + push @new_t, $sub; + push @$t, $sub; + } -+ #temporarily skip some failing tests -+ next if ($test_name =~ "col-0" or $test_name =~ "col-inval" or $test_name =~ "asan1"); ++ #disable several failing tests until investigation, disable all tests with envvars set ++ next if (grep {ref $_ eq 'HASH' && exists $_->{ENV}} (@new_t)); ++ next if ($test_name =~ "18g" or $test_name =~ "sort-numeric" or $test_name =~ "08[ab]" or $test_name =~ "03[def]" or $test_name =~ "h4" or $test_name =~ "n1" or $test_name =~ "2[01]a"); ++ next if ($test_name =~ "11[ab]"); # avoid FP: expected result differs to MB result due to collation rules. + push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } ++ } + push @Tests, @new; -+ } ++ } + @Tests = triple_test \@Tests; -+# Remember that triple_test creates from each test with exactly one "IN" -+# file two more tests (.p and .r suffix on name) corresponding to reading -+# input from a file and from a pipe. The pipe-reading test would fail -+# due to a race condition about 1 in 20 times. -+# Remove the IN_PIPE version of the "output-is-input" test above. -+# The others aren't susceptible because they have three inputs each. -+@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; -+ + # Remember that triple_test creates from each test with exactly one "IN" +@@ -432,6 +469,7 @@ foreach my $t (@Tests) + # Remove the IN_PIPE version of the "output-is-input" test above. + # The others aren't susceptible because they have three inputs each. + @Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; ++@Tests = grep {$_->[0] ne 'output-is-input-mb.p'} @Tests; + my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; - diff --git a/tests/unexpand/mb.sh b/tests/unexpand/mb.sh new file mode 100755 index 0000000..8a82d74 @@ -5520,6 +5479,82 @@ index 0000000..8a82d74 + +LC_ALL=C unexpand in in > out || fail=1 +compare exp out > /dev/null 2>&1 || fail=1 +diff --git a/tests/uniq/uniq.pl b/tests/uniq/uniq.pl +index a6354dc..e43cd6e 100755 +--- a/tests/uniq/uniq.pl ++++ b/tests/uniq/uniq.pl +@@ -23,9 +23,17 @@ my $limits = getlimits (); + my $prog = 'uniq'; + my $try = "Try '$prog --help' for more information.\n"; + ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + # Turn off localization of executable's output. + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + ++my $mb_locale; ++#Comment out next line to disable multibyte tests ++$mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ + # When possible, create a "-z"-testing variant of each test. + sub add_z_variants($) + { +@@ -262,6 +270,53 @@ foreach my $t (@Tests) + and push @$t, {ENV=>'_POSIX2_VERSION=199209'}; + } + ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether uniq is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ # In test #145, replace the each ‘...’ by '...'. ++ if ($test_name =~ "145") ++ { ++ my $sub = { ERR_SUBST => "s/‘([^’]+)’/'\$1'/g"}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ next if ( $test_name =~ "schar" ++ or $test_name =~ "^obs-plus" ++ or $test_name =~ "119"); ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ ++# Remember that triple_test creates from each test with exactly one "IN" ++# file two more tests (.p and .r suffix on name) corresponding to reading ++# input from a file and from a pipe. The pipe-reading test would fail ++# due to a race condition about 1 in 20 times. ++# Remove the IN_PIPE version of the "output-is-input" test above. ++# The others aren't susceptible because they have three inputs each. ++ ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; ++ + @Tests = add_z_variants \@Tests; + @Tests = triple_test \@Tests; + -- -2.34.1 +2.43.0 diff --git a/backport-pr-fix-infinite-loop-when-double-spacing.patch b/backport-pr-fix-infinite-loop-when-double-spacing.patch deleted file mode 100644 index 50f8829f5a77029604a92afe0568d75fe47e7b59..0000000000000000000000000000000000000000 --- a/backport-pr-fix-infinite-loop-when-double-spacing.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 3fb0cc80fa5e1cede9ec05303a70c26d0d23ca1d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?P=C3=A1draig=20Brady?= -Date: Tue, 25 Apr 2023 14:07:03 +0100 -Subject: [PATCH] pr: fix infinite loop when double spacing - -* src/pr.c (init_parameters): Ensure we avoid a 0 lines_per_body -which was possible when adjusting for double spacing. -That caused print_page() to always return true, -causing an infinite loop. -* tests/pr/pr-tests.pl: Add a test case. -* NEWS: Mention the fix. -Fixes https://bugs.debian.org/1034808 - -Conflict:NEWS context adaption -Reference:https://github.com/coreutils/coreutils/commit/3fb0cc80fa5e1cede9ec05303a70c26d0d23ca1d - ---- - NEWS | 3 +++ - src/pr.c | 2 +- - tests/pr/pr-tests.pl | 3 +++ - 3 files changed, 7 insertions(+), 1 deletion(-) - -diff --git a/NEWS b/NEWS -index f65eb95..5320b9c 100644 ---- a/NEWS -+++ b/NEWS -@@ -3,6 +3,9 @@ GNU coreutils NEWS -*- outline -*- - * Noteworthy changes in release 9.3 (2023-04-18) [stable] - - ** Bug fixes -+ -+ 'pr --length=1 --double-space' no longer enters an infinite loop. -+ [This bug was present in "the beginning".] - - cp --reflink=auto (the default), mv, and install - will again fall back to a standard copy in more cases. - Previously copies could fail with permission errors on -diff --git a/src/pr.c b/src/pr.c -index 2c5cdceb1..14a368b6c 100644 ---- a/src/pr.c -+++ b/src/pr.c -@@ -1209,7 +1209,7 @@ init_parameters (int number_of_files) - lines_per_body = lines_per_page; - - if (double_space) -- lines_per_body = lines_per_body / 2; -+ lines_per_body = MAX (1, lines_per_body / 2); - - /* If input is stdin, cannot print parallel files. BSD dumps core - on this. */ -diff --git a/tests/pr/pr-tests.pl b/tests/pr/pr-tests.pl -index 265e6e108..eafc13d81 100755 ---- a/tests/pr/pr-tests.pl -+++ b/tests/pr/pr-tests.pl -@@ -415,6 +415,9 @@ my @tv = ( - ['padding2', '-t -n,64', "1\n", (" "x 63)."1,1\n", 0], - # Ensure we handle buffer truncation correctly - ['padding3', '-t -N1000000 -n,1', "1\n", "0,1\n", 0], -+ -+# This entered an infinite loop before coreutils-9.4 -+['page-length1', '-dl1', "", "", 0], - ); - - # Convert the above old-style test vectors to the newer --- -2.36.1 - diff --git a/coreutils-9.3.tar.xz b/coreutils-9.4.tar.xz similarity index 43% rename from coreutils-9.3.tar.xz rename to coreutils-9.4.tar.xz index 25944bb718e53b58bd361fc50e2142fa837ee1b1..242acdf9e1c018c8a5a164c8f42ef7131b513b2f 100644 Binary files a/coreutils-9.3.tar.xz and b/coreutils-9.4.tar.xz differ diff --git a/coreutils.spec b/coreutils.spec index 671e9d49f30b97e4bcbf048bcf7a2ce5ab75f5eb..e7b0f31bca58bcde9b62ec2567ed0fe67d6a9b6f 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -1,6 +1,6 @@ Name: coreutils -Version: 9.3 -Release: 3 +Version: 9.4 +Release: 1 License: GPLv3+ Summary: A set of basic GNU tools commonly used in shell scripts Url: https://www.gnu.org/software/coreutils/ @@ -19,11 +19,10 @@ Patch3: bugfix-remove-usr-local-lib-from-m4.patch Patch4: bugfix-dummy_help2man.patch Patch6: skip-the-tests-that-require-selinux-if-selinux-is-di.patch Patch7: backport-config-color-alias-for-ls.patch -Patch8: backport-coreutils-i18n.patch -Patch9: backport-pr-fix-infinite-loop-when-double-spacing.patch +Patch8: backport-coreutils-df-direct.patch +Patch9: backport-coreutils-i18n.patch patch10: backport-CVE-2024-0684-split-do-not-shrink-hold-buffer.patch -Patch9000: openEuler-coreutils-df-direct.patch %ifarch sw_64 Patch9001: coreutils-9.0-sw.patch %endif @@ -155,6 +154,9 @@ fi %{_mandir}/man*/* %changelog +* Thu Feb 1 2024 lvgenggeng - 9.4-1 +- bump to 9.4 + * Wed Jan 24 2024 Jiangchuangang - 9.3-3 - fix CVE-2024-0684