diff --git a/0001-coreutils-8.31-i18n.patch b/0001-coreutils-8.31-i18n.patch deleted file mode 100644 index 4a78178351f6c3712aecb09eb17c6087008a0314..0000000000000000000000000000000000000000 --- a/0001-coreutils-8.31-i18n.patch +++ /dev/null @@ -1,5363 +0,0 @@ -From 3bc69048f43a78571269bc570ff0b229b13c012d Mon Sep 17 00:00:00 2001 -From: openEuler Buildteam -Date: Wed, 29 Jul 2020 11:35:56 +0800 -Subject: [PATCH] coreutils 8.31 i18n - ---- - bootstrap.conf | 1 + - configure.ac | 2 + - lib/linebuffer.h | 8 + - lib/mbfile.c | 3 + - lib/mbfile.h | 255 ++++++++++++ - m4/mbfile.m4 | 14 + - src/cut.c | 441 +++++++++++++++++++- - src/expand-common.c | 114 ++++++ - src/expand-common.h | 12 + - src/expand.c | 90 ++++- - src/fold.c | 307 ++++++++++++-- - src/join.c | 359 ++++++++++++++--- - src/local.mk | 4 +- - src/pr.c | 443 +++++++++++++++++++-- - src/sort.c | 772 ++++++++++++++++++++++++++++++++++-- - src/unexpand.c | 101 ++++- - src/uniq.c | 119 +++++- - tests/expand/mb.sh | 183 +++++++++ - tests/i18n/sort.sh | 29 ++ - tests/local.mk | 4 + - tests/misc/expand.pl | 42 ++ - tests/misc/fold.pl | 50 ++- - tests/misc/join.pl | 50 +++ - tests/misc/sort-mb-tests.sh | 45 +++ - tests/misc/sort-merge.pl | 42 ++ - tests/misc/sort.pl | 40 +- - tests/misc/unexpand.pl | 39 ++ - tests/misc/uniq.pl | 55 +++ - tests/pr/pr-tests.pl | 49 +++ - tests/unexpand/mb.sh | 172 ++++++++ - 30 files changed, 3633 insertions(+), 212 deletions(-) - create mode 100644 lib/mbfile.c - create mode 100644 lib/mbfile.h - create mode 100644 m4/mbfile.m4 - create mode 100755 tests/expand/mb.sh - create mode 100755 tests/i18n/sort.sh - create mode 100755 tests/misc/sort-mb-tests.sh - create mode 100755 tests/unexpand/mb.sh - -diff --git a/bootstrap.conf b/bootstrap.conf -index db49c97..2a342c1 100644 ---- a/bootstrap.conf -+++ b/bootstrap.conf -@@ -154,6 +154,7 @@ gnulib_modules=" - maintainer-makefile - malloc-gnu - manywarnings -+ mbfile - mbrlen - mbrtowc - mbsalign -diff --git a/configure.ac b/configure.ac -index c2ad08c..f5da816 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -446,6 +446,8 @@ fi - # I'm leaving it here for now. This whole thing needs to be modernized... - gl_WINSIZE_IN_PTEM - -+gl_MBFILE -+ - gl_HEADER_TIOCGWINSZ_IN_TERMIOS_H - - if test $gl_cv_sys_tiocgwinsz_needs_termios_h = no && \ -diff --git a/lib/linebuffer.h b/lib/linebuffer.h -index b19fea7..1f0a1a2 100644 ---- a/lib/linebuffer.h -+++ b/lib/linebuffer.h -@@ -21,6 +21,11 @@ - - # include - -+/* Get mbstate_t. */ -+# if HAVE_WCHAR_H -+# include -+# endif -+ - /* A 'struct linebuffer' holds a line of text. */ - - struct linebuffer -@@ -28,6 +33,9 @@ struct linebuffer - size_t size; /* Allocated. */ - size_t length; /* Used. */ - char *buffer; -+# if HAVE_WCHAR_H -+ mbstate_t state; -+# endif - }; - - /* Initialize linebuffer LINEBUFFER for use. */ -diff --git a/lib/mbfile.c b/lib/mbfile.c -new file mode 100644 -index 0000000..b0a468e ---- /dev/null -+++ b/lib/mbfile.c -@@ -0,0 +1,3 @@ -+#include -+#define MBFILE_INLINE _GL_EXTERN_INLINE -+#include "mbfile.h" -diff --git a/lib/mbfile.h b/lib/mbfile.h -new file mode 100644 -index 0000000..11f1b12 ---- /dev/null -+++ b/lib/mbfile.h -@@ -0,0 +1,255 @@ -+/* Multibyte character I/O: macros for multi-byte encodings. -+ Copyright (C) 2001, 2005, 2009-2015 Free Software Foundation, Inc. -+ -+ This program is free software: you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3 of the License, or -+ (at your option) any later version. -+ -+ This program is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program. If not, see . */ -+ -+/* Written by Mitsuru Chinen -+ and Bruno Haible . */ -+ -+/* The macros in this file implement multi-byte character input from a -+ stream. -+ -+ mb_file_t -+ is the type for multibyte character input stream, usable for variable -+ declarations. -+ -+ mbf_char_t -+ is the type for multibyte character or EOF, usable for variable -+ declarations. -+ -+ mbf_init (mbf, stream) -+ initializes the MB_FILE for reading from stream. -+ -+ mbf_getc (mbc, mbf) -+ reads the next multibyte character from mbf and stores it in mbc. -+ -+ mb_iseof (mbc) -+ returns true if mbc represents the EOF value. -+ -+ Here are the function prototypes of the macros. -+ -+ extern void mbf_init (mb_file_t mbf, FILE *stream); -+ extern void mbf_getc (mbf_char_t mbc, mb_file_t mbf); -+ extern bool mb_iseof (const mbf_char_t mbc); -+ */ -+ -+#ifndef _MBFILE_H -+#define _MBFILE_H 1 -+ -+#include -+#include -+#include -+#include -+ -+/* Tru64 with Desktop Toolkit C has a bug: must be included before -+ . -+ BSD/OS 4.1 has a bug: and must be included before -+ . */ -+#include -+#include -+#include -+ -+#include "mbchar.h" -+ -+#ifndef _GL_INLINE_HEADER_BEGIN -+ #error "Please include config.h first." -+#endif -+_GL_INLINE_HEADER_BEGIN -+#ifndef MBFILE_INLINE -+# define MBFILE_INLINE _GL_INLINE -+#endif -+ -+struct mbfile_multi { -+ FILE *fp; -+ bool eof_seen; -+ bool have_pushback; -+ mbstate_t state; -+ unsigned int bufcount; -+ char buf[MBCHAR_BUF_SIZE]; -+ struct mbchar pushback; -+}; -+ -+MBFILE_INLINE void -+mbfile_multi_getc (struct mbchar *mbc, struct mbfile_multi *mbf) -+{ -+ size_t bytes; -+ -+ /* If EOF has already been seen, don't use getc. This matters if -+ mbf->fp is connected to an interactive tty. */ -+ if (mbf->eof_seen) -+ goto eof; -+ -+ /* Return character pushed back, if there is one. */ -+ if (mbf->have_pushback) -+ { -+ mb_copy (mbc, &mbf->pushback); -+ mbf->have_pushback = false; -+ return; -+ } -+ -+ /* Before using mbrtowc, we need at least one byte. */ -+ if (mbf->bufcount == 0) -+ { -+ int c = getc (mbf->fp); -+ if (c == EOF) -+ { -+ mbf->eof_seen = true; -+ goto eof; -+ } -+ mbf->buf[0] = (unsigned char) c; -+ mbf->bufcount++; -+ } -+ -+ /* Handle most ASCII characters quickly, without calling mbrtowc(). */ -+ if (mbf->bufcount == 1 && mbsinit (&mbf->state) && is_basic (mbf->buf[0])) -+ { -+ /* These characters are part of the basic character set. ISO C 99 -+ guarantees that their wide character code is identical to their -+ char code. */ -+ mbc->wc = mbc->buf[0] = mbf->buf[0]; -+ mbc->wc_valid = true; -+ mbc->ptr = &mbc->buf[0]; -+ mbc->bytes = 1; -+ mbf->bufcount = 0; -+ return; -+ } -+ -+ /* Use mbrtowc on an increasing number of bytes. Read only as many bytes -+ from mbf->fp as needed. This is needed to give reasonable interactive -+ behaviour when mbf->fp is connected to an interactive tty. */ -+ for (;;) -+ { -+ /* We don't know whether the 'mbrtowc' function updates the state when -+ it returns -2, - this is the ISO C 99 and glibc-2.2 behaviour - or -+ not - amended ANSI C, glibc-2.1 and Solaris 2.7 behaviour. We -+ don't have an autoconf test for this, yet. -+ The new behaviour would allow us to feed the bytes one by one into -+ mbrtowc. But the old behaviour forces us to feed all bytes since -+ the end of the last character into mbrtowc. Since we want to retry -+ with more bytes when mbrtowc returns -2, we must backup the state -+ before calling mbrtowc, because implementations with the new -+ behaviour will clobber it. */ -+ mbstate_t backup_state = mbf->state; -+ -+ bytes = mbrtowc (&mbc->wc, &mbf->buf[0], mbf->bufcount, &mbf->state); -+ -+ if (bytes == (size_t) -1) -+ { -+ /* An invalid multibyte sequence was encountered. */ -+ /* Return a single byte. */ -+ bytes = 1; -+ mbc->wc_valid = false; -+ break; -+ } -+ else if (bytes == (size_t) -2) -+ { -+ /* An incomplete multibyte character. */ -+ mbf->state = backup_state; -+ if (mbf->bufcount == MBCHAR_BUF_SIZE) -+ { -+ /* An overlong incomplete multibyte sequence was encountered. */ -+ /* Return a single byte. */ -+ bytes = 1; -+ mbc->wc_valid = false; -+ break; -+ } -+ else -+ { -+ /* Read one more byte and retry mbrtowc. */ -+ int c = getc (mbf->fp); -+ if (c == EOF) -+ { -+ /* An incomplete multibyte character at the end. */ -+ mbf->eof_seen = true; -+ bytes = mbf->bufcount; -+ mbc->wc_valid = false; -+ break; -+ } -+ mbf->buf[mbf->bufcount] = (unsigned char) c; -+ mbf->bufcount++; -+ } -+ } -+ else -+ { -+ if (bytes == 0) -+ { -+ /* A null wide character was encountered. */ -+ bytes = 1; -+ assert (mbf->buf[0] == '\0'); -+ assert (mbc->wc == 0); -+ } -+ mbc->wc_valid = true; -+ break; -+ } -+ } -+ -+ /* Return the multibyte sequence mbf->buf[0..bytes-1]. */ -+ mbc->ptr = &mbc->buf[0]; -+ memcpy (&mbc->buf[0], &mbf->buf[0], bytes); -+ mbc->bytes = bytes; -+ -+ mbf->bufcount -= bytes; -+ if (mbf->bufcount > 0) -+ { -+ /* It's not worth calling memmove() for so few bytes. */ -+ unsigned int count = mbf->bufcount; -+ char *p = &mbf->buf[0]; -+ -+ do -+ { -+ *p = *(p + bytes); -+ p++; -+ } -+ while (--count > 0); -+ } -+ return; -+ -+eof: -+ /* An mbchar_t with bytes == 0 is used to indicate EOF. */ -+ mbc->ptr = NULL; -+ mbc->bytes = 0; -+ mbc->wc_valid = false; -+ return; -+} -+ -+MBFILE_INLINE void -+mbfile_multi_ungetc (const struct mbchar *mbc, struct mbfile_multi *mbf) -+{ -+ mb_copy (&mbf->pushback, mbc); -+ mbf->have_pushback = true; -+} -+ -+typedef struct mbfile_multi mb_file_t; -+ -+typedef mbchar_t mbf_char_t; -+ -+#define mbf_init(mbf, stream) \ -+ ((mbf).fp = (stream), \ -+ (mbf).eof_seen = false, \ -+ (mbf).have_pushback = false, \ -+ memset (&(mbf).state, '\0', sizeof (mbstate_t)), \ -+ (mbf).bufcount = 0) -+ -+#define mbf_getc(mbc, mbf) mbfile_multi_getc (&(mbc), &(mbf)) -+ -+#define mbf_ungetc(mbc, mbf) mbfile_multi_ungetc (&(mbc), &(mbf)) -+ -+#define mb_iseof(mbc) ((mbc).bytes == 0) -+ -+#ifndef _GL_INLINE_HEADER_BEGIN -+ #error "Please include config.h first." -+#endif -+_GL_INLINE_HEADER_BEGIN -+ -+#endif /* _MBFILE_H */ -diff --git a/m4/mbfile.m4 b/m4/mbfile.m4 -new file mode 100644 -index 0000000..8589902 ---- /dev/null -+++ b/m4/mbfile.m4 -@@ -0,0 +1,14 @@ -+# mbfile.m4 serial 7 -+dnl Copyright (C) 2005, 2008-2015 Free Software Foundation, Inc. -+dnl This file is free software; the Free Software Foundation -+dnl gives unlimited permission to copy and/or distribute it, -+dnl with or without modifications, as long as this notice is preserved. -+ -+dnl autoconf tests required for use of mbfile.h -+dnl From Bruno Haible. -+ -+AC_DEFUN([gl_MBFILE], -+[ -+ AC_REQUIRE([AC_TYPE_MBSTATE_T]) -+ : -+]) -diff --git a/src/cut.c b/src/cut.c -index 0f6ba60..35ab5fc 100644 ---- a/src/cut.c -+++ b/src/cut.c -@@ -28,6 +28,11 @@ - #include - #include - #include -+ -+/* Get mbstate_t, mbrtowc(). */ -+#if HAVE_WCHAR_H -+# include -+#endif - #include "system.h" - - #include "error.h" -@@ -38,6 +43,18 @@ - - #include "set-fields.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# undef MB_LEN_MAX -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - /* The official name of this program (e.g., no 'g' prefix). */ - #define PROGRAM_NAME "cut" - -@@ -54,6 +71,52 @@ - } \ - while (0) - -+/* Refill the buffer BUF to get a multibyte character. */ -+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ -+ do \ -+ { \ -+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ -+ { \ -+ memmove (BUF, BUFPOS, BUFLEN); \ -+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ -+ BUFPOS = BUF; \ -+ } \ -+ } \ -+ while (0) -+ -+/* Get wide character on BUFPOS. BUFPOS is not included after that. -+ If byte sequence is not valid as a character, CONVFAIL is true. Otherwise false. */ -+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ -+ do \ -+ { \ -+ mbstate_t state_bak; \ -+ \ -+ if (BUFLEN < 1) \ -+ { \ -+ WC = WEOF; \ -+ break; \ -+ } \ -+ \ -+ /* Get a wide character. */ \ -+ CONVFAIL = false; \ -+ state_bak = STATE; \ -+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ -+ \ -+ switch (MBLENGTH) \ -+ { \ -+ case (size_t)-1: \ -+ case (size_t)-2: \ -+ CONVFAIL = true; \ -+ STATE = state_bak; \ -+ /* Fall througn. */ \ -+ \ -+ case 0: \ -+ MBLENGTH = 1; \ -+ break; \ -+ } \ -+ } \ -+ while (0) -+ - - /* Pointer inside RP. When checking if a byte or field is selected - by a finite range, we check if it is between CURRENT_RP.LO -@@ -61,6 +124,9 @@ - CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */ - static struct field_range_pair *current_rp; - -+/* Length of the delimiter given as argument to -d. */ -+size_t delimlen; -+ - /* This buffer is used to support the semantics of the -s option - (or lack of same) when the specified field list includes (does - not include) the first field. In both of those cases, the entire -@@ -77,15 +143,25 @@ enum operating_mode - { - undefined_mode, - -- /* Output characters that are in the given bytes. */ -+ /* Output bytes that are at the given positions. */ - byte_mode, - -+ /* Output characters that are at the given positions. */ -+ character_mode, -+ - /* Output the given delimiter-separated fields. */ - field_mode - }; - - static enum operating_mode operating_mode; - -+/* If nonzero, when in byte mode, don't split multibyte characters. */ -+static int byte_mode_character_aware; -+ -+/* If nonzero, the function for single byte locale is work -+ if this program runs on multibyte locale. */ -+static int force_singlebyte_mode; -+ - /* If true do not output lines containing no delimiter characters. - Otherwise, all such lines are printed. This option is valid only - with field mode. */ -@@ -97,6 +173,9 @@ static bool complement; - - /* The delimiter character for field mode. */ - static unsigned char delim; -+#if HAVE_WCHAR_H -+static wchar_t wcdelim; -+#endif - - /* The delimiter for each line/record. */ - static unsigned char line_delim = '\n'; -@@ -164,7 +243,7 @@ Print selected parts of lines from each FILE to standard output.\n\ - -f, --fields=LIST select only these fields; also print any line\n\ - that contains no delimiter character, unless\n\ - the -s option is specified\n\ -- -n (ignored)\n\ -+ -n with -b: don't split multibyte characters\n\ - "), stdout); - fputs (_("\ - --complement complement the set of selected bytes, characters\n\ -@@ -280,6 +359,82 @@ cut_bytes (FILE *stream) - } - } - -+#if HAVE_MBRTOWC -+/* This function is in use for the following case. -+ -+ 1. Read from the stream STREAM, printing to standard output any selected -+ characters. -+ -+ 2. Read from stream STREAM, printing to standard output any selected bytes, -+ without splitting multibyte characters. */ -+ -+static void -+cut_characters_or_cut_bytes_no_split (FILE *stream) -+{ -+ uintmax_t idx; /* number of bytes or characters in the line so far. */ -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos; /* Next read position of BUF. */ -+ size_t buflen; /* The length of the byte sequence in buf. */ -+ wint_t wc; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character which shows -+ as same character as WC. */ -+ mbstate_t state; /* State of the stream. */ -+ bool convfail = false; /* true, when conversion failed. Otherwise false. */ -+ /* Whether to begin printing delimiters between ranges for the current line. -+ Set after we've begun printing data corresponding to the first range. */ -+ bool print_delimiter = false; -+ -+ idx = 0; -+ buflen = 0; -+ bufpos = buf; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ current_rp = frp; -+ -+ while (1) -+ { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); -+ (void) convfail; /* ignore unused */ -+ -+ if (wc == WEOF) -+ { -+ if (idx > 0) -+ putchar (line_delim); -+ break; -+ } -+ else if (wc == line_delim) -+ { -+ putchar (line_delim); -+ idx = 0; -+ print_delimiter = false; -+ current_rp = frp; -+ } -+ else -+ { -+ next_item (&idx); -+ if (print_kth (idx)) -+ { -+ if (output_delimiter_specified) -+ { -+ if (print_delimiter && is_range_start_index (idx)) -+ { -+ fwrite (output_delimiter_string, sizeof (char), -+ output_delimiter_length, stdout); -+ } -+ print_delimiter = true; -+ } -+ fwrite (bufpos, mblength, sizeof(char), stdout); -+ } -+ } -+ -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+} -+#endif -+ - /* Read from stream STREAM, printing to standard output any selected fields. */ - - static void -@@ -425,13 +580,211 @@ cut_fields (FILE *stream) - } - } - -+#if HAVE_MBRTOWC -+static void -+cut_fields_mb (FILE *stream) -+{ -+ int c; -+ uintmax_t field_idx; -+ int found_any_selected_field; -+ int buffer_first_field; -+ int empty_input; -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos; /* Next read position of BUF. */ -+ size_t buflen; /* The length of the byte sequence in buf. */ -+ wint_t wc = 0; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character which shows -+ as same character as WC. */ -+ mbstate_t state; /* State of the stream. */ -+ bool convfail = false; /* true, when conversion failed. Otherwise false. */ -+ -+ current_rp = frp; -+ -+ found_any_selected_field = 0; -+ field_idx = 1; -+ bufpos = buf; -+ buflen = 0; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ c = getc (stream); -+ empty_input = (c == EOF); -+ if (c != EOF) -+ { -+ ungetc (c, stream); -+ wc = 0; -+ } -+ else -+ wc = WEOF; -+ -+ /* To support the semantics of the -s flag, we may have to buffer -+ all of the first field to determine whether it is `delimited.' -+ But that is unnecessary if all non-delimited lines must be printed -+ and the first field has been selected, or if non-delimited lines -+ must be suppressed and the first field has *not* been selected. -+ That is because a non-delimited line has exactly one field. */ -+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1)); -+ -+ while (1) -+ { -+ if (field_idx == 1 && buffer_first_field) -+ { -+ int len = 0; -+ -+ while (1) -+ { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER -+ (wc, bufpos, buflen, mblength, state, convfail); -+ -+ if (wc == WEOF) -+ break; -+ -+ field_1_buffer = xrealloc (field_1_buffer, len + mblength); -+ memcpy (field_1_buffer + len, bufpos, mblength); -+ len += mblength; -+ buflen -= mblength; -+ bufpos += mblength; -+ -+ if (!convfail && (wc == line_delim || wc == wcdelim)) -+ break; -+ } -+ -+ if (len <= 0 && wc == WEOF) -+ break; -+ -+ /* If the first field extends to the end of line (it is not -+ delimited) and we are printing all non-delimited lines, -+ print this one. */ -+ if (convfail || (!convfail && wc != wcdelim)) -+ { -+ if (suppress_non_delimited) -+ { -+ /* Empty. */ -+ } -+ else -+ { -+ fwrite (field_1_buffer, sizeof (char), len, stdout); -+ /* Make sure the output line is newline terminated. */ -+ if (convfail || (!convfail && wc != line_delim)) -+ putchar (line_delim); -+ } -+ continue; -+ } -+ -+ if (print_kth (1)) -+ { -+ /* Print the field, but not the trailing delimiter. */ -+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout); -+ found_any_selected_field = 1; -+ } -+ next_item (&field_idx); -+ } -+ -+ if (wc != WEOF) -+ { -+ if (print_kth (field_idx)) -+ { -+ if (found_any_selected_field) -+ { -+ fwrite (output_delimiter_string, sizeof (char), -+ output_delimiter_length, stdout); -+ } -+ found_any_selected_field = 1; -+ } -+ -+ while (1) -+ { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER -+ (wc, bufpos, buflen, mblength, state, convfail); -+ -+ if (wc == WEOF) -+ break; -+ else if (!convfail && (wc == wcdelim || wc == line_delim)) -+ { -+ buflen -= mblength; -+ bufpos += mblength; -+ break; -+ } -+ -+ if (print_kth (field_idx)) -+ fwrite (bufpos, mblength, sizeof(char), stdout); -+ -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+ } -+ -+ if ((!convfail || wc == line_delim) && buflen < 1) -+ wc = WEOF; -+ -+ if (!convfail && wc == wcdelim) -+ next_item (&field_idx); -+ else if (wc == WEOF || (!convfail && wc == line_delim)) -+ { -+ if (found_any_selected_field -+ || (!empty_input && !(suppress_non_delimited && field_idx == 1))) -+ putchar (line_delim); -+ if (wc == WEOF) -+ break; -+ field_idx = 1; -+ current_rp = frp; -+ found_any_selected_field = 0; -+ } -+ } -+} -+#endif -+ - static void - cut_stream (FILE *stream) - { -- if (operating_mode == byte_mode) -- cut_bytes (stream); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) -+ { -+ switch (operating_mode) -+ { -+ case byte_mode: -+ if (byte_mode_character_aware) -+ cut_characters_or_cut_bytes_no_split (stream); -+ else -+ cut_bytes (stream); -+ break; -+ -+ case character_mode: -+ cut_characters_or_cut_bytes_no_split (stream); -+ break; -+ -+ case field_mode: -+ if (delimlen == 1) -+ { -+ /* Check if we have utf8 multibyte locale, so we can use this -+ optimization because of uniqueness of characters, which is -+ not true for e.g. SJIS */ -+ char * loc = setlocale(LC_CTYPE, NULL); -+ if (loc && (strstr (loc, "UTF-8") || strstr (loc, "utf-8") || -+ strstr (loc, "UTF8") || strstr (loc, "utf8"))) -+ { -+ cut_fields (stream); -+ break; -+ } -+ } -+ cut_fields_mb (stream); -+ break; -+ -+ default: -+ abort (); -+ } -+ } - else -- cut_fields (stream); -+#endif -+ { -+ if (operating_mode == field_mode) -+ cut_fields (stream); -+ else -+ cut_bytes (stream); -+ } - } - - /* Process file FILE to standard output. -@@ -483,6 +836,7 @@ main (int argc, char **argv) - bool ok; - bool delim_specified = false; - char *spec_list_string IF_LINT ( = NULL); -+ char mbdelim[MB_LEN_MAX + 1]; - - initialize_main (&argc, &argv); - set_program_name (argv[0]); -@@ -505,7 +859,6 @@ main (int argc, char **argv) - switch (optc) - { - case 'b': -- case 'c': - /* Build the byte list. */ - if (operating_mode != undefined_mode) - FATAL_ERROR (_("only one type of list may be specified")); -@@ -513,6 +866,14 @@ main (int argc, char **argv) - spec_list_string = optarg; - break; - -+ case 'c': -+ /* Build the character list. */ -+ if (operating_mode != undefined_mode) -+ FATAL_ERROR (_("only one type of list may be specified")); -+ operating_mode = character_mode; -+ spec_list_string = optarg; -+ break; -+ - case 'f': - /* Build the field list. */ - if (operating_mode != undefined_mode) -@@ -524,10 +885,38 @@ main (int argc, char **argv) - case 'd': - /* New delimiter. */ - /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ -- if (optarg[0] != '\0' && optarg[1] != '\0') -- FATAL_ERROR (_("the delimiter must be a single character")); -- delim = optarg[0]; -- delim_specified = true; -+ { -+#if HAVE_MBRTOWC -+ if(MB_CUR_MAX > 1) -+ { -+ mbstate_t state; -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state); -+ -+ if (delimlen == (size_t)-1 || delimlen == (size_t)-2) -+ ++force_singlebyte_mode; -+ else -+ { -+ delimlen = (delimlen < 1) ? 1 : delimlen; -+ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0') -+ FATAL_ERROR (_("the delimiter must be a single character")); -+ memcpy (mbdelim, optarg, delimlen); -+ mbdelim[delimlen] = '\0'; -+ if (delimlen == 1) -+ delim = *optarg; -+ } -+ } -+ -+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) -+#endif -+ { -+ if (optarg[0] != '\0' && optarg[1] != '\0') -+ FATAL_ERROR (_("the delimiter must be a single character")); -+ delim = (unsigned char) optarg[0]; -+ } -+ delim_specified = true; -+ } - break; - - case OUTPUT_DELIMITER_OPTION: -@@ -540,6 +929,7 @@ main (int argc, char **argv) - break; - - case 'n': -+ byte_mode_character_aware = 1; - break; - - case 's': -@@ -579,15 +969,34 @@ main (int argc, char **argv) - | (complement ? SETFLD_COMPLEMENT : 0) ); - - if (!delim_specified) -- delim = '\t'; -+ { -+ delim = '\t'; -+#ifdef HAVE_MBRTOWC -+ wcdelim = L'\t'; -+ mbdelim[0] = '\t'; -+ mbdelim[1] = '\0'; -+ delimlen = 1; -+#endif -+ } - - if (output_delimiter_string == NULL) - { -- static char dummy[2]; -- dummy[0] = delim; -- dummy[1] = '\0'; -- output_delimiter_string = dummy; -- output_delimiter_length = 1; -+#ifdef HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) -+ { -+ output_delimiter_string = xstrdup(mbdelim); -+ output_delimiter_length = delimlen; -+ } -+ -+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) -+#endif -+ { -+ static char dummy[2]; -+ dummy[0] = delim; -+ dummy[1] = '\0'; -+ output_delimiter_string = dummy; -+ output_delimiter_length = 1; -+ } - } - - if (optind == argc) -diff --git a/src/expand-common.c b/src/expand-common.c -index e4209a0..76be4d8 100644 ---- a/src/expand-common.c -+++ b/src/expand-common.c -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include "system.h" - #include "die.h" - #include "error.h" -@@ -126,6 +127,119 @@ set_increment_size (uintmax_t tabval) - return ok; - } - -+extern int -+set_utf_locale (void) -+{ -+ /*try using some predefined locale */ -+ const char* predef_locales[] = {"C.UTF8","en_US.UTF8","en_GB.UTF8"}; -+ -+ const int predef_locales_count=3; -+ for (int i=0;ibufcount=0; -+ if (c == 0xEF) -+ { -+ c=fgetc(fp); -+ } -+ else -+ { -+ if (c != EOF) -+ { -+ ungetc(c,fp); -+ } -+ return false; -+ } -+ -+ if (c == 0xBB) -+ { -+ c=fgetc(fp); -+ } -+ else -+ { -+ if ( c!= EOF ) -+ { -+ mbf->buf[0]=(unsigned char) 0xEF; -+ mbf->bufcount=1; -+ ungetc(c,fp); -+ return false; -+ } -+ else -+ { -+ ungetc(0xEF,fp); -+ return false; -+ } -+ } -+ if (c == 0xBF) -+ { -+ mbf->bufcount=0; -+ return true; -+ } -+ else -+ { -+ if (c != EOF) -+ { -+ mbf->buf[0]=(unsigned char) 0xEF; -+ mbf->buf[1]=(unsigned char) 0xBB; -+ mbf->bufcount=2; -+ ungetc(c,fp); -+ return false; -+ } -+ else -+ { -+ mbf->buf[0]=(unsigned char) 0xEF; -+ mbf->bufcount=1; -+ ungetc(0xBB,fp); -+ return false; -+ } -+ } -+ return false; -+} -+ -+extern void -+print_bom(void) -+{ -+ putc (0xEF, stdout); -+ putc (0xBB, stdout); -+ putc (0xBF, stdout); -+} -+ - /* Add the comma or blank separated list of tab stops STOPS - to the list of tab stops. */ - extern void -diff --git a/src/expand-common.h b/src/expand-common.h -index f304fbb..eaa2f15 100644 ---- a/src/expand-common.h -+++ b/src/expand-common.h -@@ -34,6 +34,18 @@ extern size_t max_column_width; - /* The desired exit status. */ - extern int exit_status; - -+extern int -+set_utf_locale (void); -+ -+extern bool -+check_utf_locale(void); -+ -+extern bool -+check_bom(FILE* fp, mb_file_t *mbf); -+ -+extern void -+print_bom(void); -+ - /* Add tab stop TABVAL to the end of 'tab_list'. */ - extern void - add_tab_stop (uintmax_t tabval); -diff --git a/src/expand.c b/src/expand.c -index 3412d7b..bf61aff 100644 ---- a/src/expand.c -+++ b/src/expand.c -@@ -37,6 +37,9 @@ - #include - #include - #include -+ -+#include -+ - #include "system.h" - #include "die.h" - #include "xstrndup.h" -@@ -98,19 +101,41 @@ expand (void) - { - /* Input stream. */ - FILE *fp = next_file (NULL); -+ mb_file_t mbf; -+ mbf_char_t c; -+ /* True if the starting locale is utf8. */ -+ bool using_utf_locale; -+ -+ /* True if the first file contains BOM header. */ -+ bool found_bom; -+ using_utf_locale=check_utf_locale(); - - if (!fp) - return; -+ mbf_init (mbf, fp); -+ found_bom=check_bom(fp,&mbf); - -- while (true) -+ if (using_utf_locale == false && found_bom == true) -+ { -+ /*try using some predefined locale */ -+ -+ if (set_utf_locale () != 0) - { -- /* Input character, or EOF. */ -- int c; -+ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale")); -+ } -+ } -+ -+ -+ if (found_bom == true) -+ { -+ print_bom(); -+ } - -+ while (true) -+ { - /* If true, perform translations. */ - bool convert = true; - -- - /* The following variables have valid values only when CONVERT - is true: */ - -@@ -120,17 +145,48 @@ expand (void) - /* Index in TAB_LIST of next tab stop to examine. */ - size_t tab_index = 0; - -- - /* Convert a line of text. */ - - do - { -- while ((c = getc (fp)) < 0 && (fp = next_file (fp))) -- continue; -+ while (true) { -+ mbf_getc (c, mbf); -+ if ((mb_iseof (c)) && (fp = next_file (fp))) -+ { -+ mbf_init (mbf, fp); -+ if (fp!=NULL) -+ { -+ if (check_bom(fp,&mbf)==true) -+ { -+ /*Not the first file - check BOM header*/ -+ if (using_utf_locale==false && found_bom==false) -+ { -+ /*BOM header in subsequent file but not in the first one. */ -+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header")); -+ } -+ } -+ else -+ { -+ if(using_utf_locale==false && found_bom==true) -+ { -+ /*First file conatined BOM header - locale was switched to UTF -+ *all subsequent files should contain BOM. */ -+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header")); -+ } -+ } -+ } -+ continue; -+ } -+ else -+ { -+ break; -+ } -+ } -+ - - if (convert) - { -- if (c == '\t') -+ if (mb_iseq (c, '\t')) - { - /* Column the next input tab stop is on. */ - uintmax_t next_tab_column; -@@ -149,32 +205,34 @@ expand (void) - if (putchar (' ') < 0) - die (EXIT_FAILURE, errno, _("write error")); - -- c = ' '; -+ mb_setascii (&c, ' '); - } -- else if (c == '\b') -+ else if (mb_iseq (c, '\b')) - { - /* Go back one column, and force recalculation of the - next tab stop. */ - column -= !!column; - tab_index -= !!tab_index; - } -- else -+ /* A leading control character could make us trip over. */ -+ else if (!mb_iscntrl (c)) - { -- column++; -+ column += mb_width (c); - if (!column) - die (EXIT_FAILURE, 0, _("input line is too long")); - } - -- convert &= convert_entire_line || !! isblank (c); -+ convert &= convert_entire_line || mb_isblank (c); - } - -- if (c < 0) -+ if (mb_iseof (c)) - return; - -- if (putchar (c) < 0) -+ mb_putc (c, stdout); -+ if (ferror (stdout)) - die (EXIT_FAILURE, errno, _("write error")); - } -- while (c != '\n'); -+ while (!mb_iseq (c, '\n')); - } - } - -diff --git a/src/fold.c b/src/fold.c -index 2221f72..917c652 100644 ---- a/src/fold.c -+++ b/src/fold.c -@@ -22,12 +22,34 @@ - #include - #include - -+/* Get mbstate_t, mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include -+#endif -+ -+/* Get iswprint(), iswblank(), wcwidth(). */ -+#if HAVE_WCTYPE_H -+# include -+#endif -+ - #include "system.h" - #include "die.h" - #include "error.h" - #include "fadvise.h" - #include "xdectoint.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# undef MB_LEN_MAX -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - #define TAB_WIDTH 8 - - /* The official name of this program (e.g., no 'g' prefix). */ -@@ -35,20 +57,41 @@ - - #define AUTHORS proper_name ("David MacKenzie") - -+#define FATAL_ERROR(Message) \ -+ do \ -+ { \ -+ error (0, 0, (Message)); \ -+ usage (2); \ -+ } \ -+ while (0) -+ -+enum operating_mode -+{ -+ /* Fold texts by columns that are at the given positions. */ -+ column_mode, -+ -+ /* Fold texts by bytes that are at the given positions. */ -+ byte_mode, -+ -+ /* Fold texts by characters that are at the given positions. */ -+ character_mode, -+}; -+ -+/* The argument shows current mode. (Default: column_mode) */ -+static enum operating_mode operating_mode; -+ - /* If nonzero, try to break on whitespace. */ - static bool break_spaces; - --/* If nonzero, count bytes, not column positions. */ --static bool count_bytes; -- - /* If nonzero, at least one of the files we read was standard input. */ - static bool have_read_stdin; - --static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::"; -+static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::"; - - static struct option const longopts[] = - { - {"bytes", no_argument, NULL, 'b'}, -+ {"characters", no_argument, NULL, 'c'}, - {"spaces", no_argument, NULL, 's'}, - {"width", required_argument, NULL, 'w'}, - {GETOPT_HELP_OPTION_DECL}, -@@ -76,6 +119,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ - - fputs (_("\ - -b, --bytes count bytes rather than columns\n\ -+ -c, --characters count characters rather than columns\n\ - -s, --spaces break at spaces\n\ - -w, --width=WIDTH use WIDTH columns instead of 80\n\ - "), stdout); -@@ -93,7 +137,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ - static size_t - adjust_column (size_t column, char c) - { -- if (!count_bytes) -+ if (operating_mode != byte_mode) - { - if (c == '\b') - { -@@ -116,30 +160,14 @@ adjust_column (size_t column, char c) - to stdout, with maximum line length WIDTH. - Return true if successful. */ - --static bool --fold_file (char const *filename, size_t width) -+static void -+fold_text (FILE *istream, size_t width, int *saved_errno) - { -- FILE *istream; - int c; - size_t column = 0; /* Screen column where next char will go. */ - size_t offset_out = 0; /* Index in 'line_out' for next char. */ - static char *line_out = NULL; - static size_t allocated_out = 0; -- int saved_errno; -- -- if (STREQ (filename, "-")) -- { -- istream = stdin; -- have_read_stdin = true; -- } -- else -- istream = fopen (filename, "r"); -- -- if (istream == NULL) -- { -- error (0, errno, "%s", quotef (filename)); -- return false; -- } - - fadvise (istream, FADVISE_SEQUENTIAL); - -@@ -169,6 +197,15 @@ fold_file (char const *filename, size_t width) - bool found_blank = false; - size_t logical_end = offset_out; - -+ /* If LINE_OUT has no wide character, -+ put a new wide character in LINE_OUT -+ if column is bigger than width. */ -+ if (offset_out == 0) -+ { -+ line_out[offset_out++] = c; -+ continue; -+ } -+ - /* Look for the last blank. */ - while (logical_end) - { -@@ -215,11 +252,220 @@ fold_file (char const *filename, size_t width) - line_out[offset_out++] = c; - } - -- saved_errno = errno; -+ *saved_errno = errno; -+ -+ if (offset_out) -+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); -+ -+} -+ -+#if HAVE_MBRTOWC -+static void -+fold_multibyte_text (FILE *istream, size_t width, int *saved_errno) -+{ -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ size_t buflen = 0; /* The length of the byte sequence in buf. */ -+ char *bufpos = buf; /* Next read position of BUF. */ -+ wint_t wc; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character which shows -+ as same character as WC. */ -+ mbstate_t state, state_bak; /* State of the stream. */ -+ int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */ -+ -+ static char *line_out = NULL; -+ size_t offset_out = 0; /* Index in `line_out' for next char. */ -+ static size_t allocated_out = 0; -+ -+ int increment; -+ size_t column = 0; -+ -+ size_t last_blank_pos; -+ size_t last_blank_column; -+ int is_blank_seen; -+ int last_blank_increment = 0; -+ int is_bs_following_last_blank; -+ size_t bs_following_last_blank_num; -+ int is_cr_after_last_blank; -+ -+#define CLEAR_FLAGS \ -+ do \ -+ { \ -+ last_blank_pos = 0; \ -+ last_blank_column = 0; \ -+ is_blank_seen = 0; \ -+ is_bs_following_last_blank = 0; \ -+ bs_following_last_blank_num = 0; \ -+ is_cr_after_last_blank = 0; \ -+ } \ -+ while (0) -+ -+#define START_NEW_LINE \ -+ do \ -+ { \ -+ putchar ('\n'); \ -+ column = 0; \ -+ offset_out = 0; \ -+ CLEAR_FLAGS; \ -+ } \ -+ while (0) -+ -+ CLEAR_FLAGS; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ for (;; bufpos += mblength, buflen -= mblength) -+ { -+ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) -+ { -+ memmove (buf, bufpos, buflen); -+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); -+ bufpos = buf; -+ } -+ -+ if (buflen < 1) -+ break; -+ -+ /* Get a wide character. */ -+ state_bak = state; -+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); -+ -+ switch (mblength) -+ { -+ case (size_t)-1: -+ case (size_t)-2: -+ convfail++; -+ state = state_bak; -+ /* Fall through. */ -+ -+ case 0: -+ mblength = 1; -+ break; -+ } -+ -+rescan: -+ if (convfail) -+ increment = 1; -+ else if (wc == L'\n') -+ { -+ /* preserve newline */ -+ fwrite (line_out, sizeof(char), offset_out, stdout); -+ START_NEW_LINE; -+ continue; -+ } -+ else if (operating_mode == byte_mode) /* byte mode */ -+ increment = mblength; -+ else if (operating_mode == character_mode) /* character mode */ -+ increment = 1; -+ else /* column mode */ -+ { -+ switch (wc) -+ { -+ case L'\b': -+ increment = (column > 0) ? -1 : 0; -+ break; -+ -+ case L'\r': -+ increment = -1 * column; -+ break; -+ -+ case L'\t': -+ increment = 8 - column % 8; -+ break; -+ -+ default: -+ increment = wcwidth (wc); -+ increment = (increment < 0) ? 0 : increment; -+ } -+ } -+ -+ if (column + increment > width && break_spaces && last_blank_pos) -+ { -+ fwrite (line_out, sizeof(char), last_blank_pos, stdout); -+ putchar ('\n'); -+ -+ offset_out = offset_out - last_blank_pos; -+ column = column - last_blank_column + ((is_cr_after_last_blank) -+ ? last_blank_increment : bs_following_last_blank_num); -+ memmove (line_out, line_out + last_blank_pos, offset_out); -+ CLEAR_FLAGS; -+ goto rescan; -+ } -+ -+ if (column + increment > width && column != 0) -+ { -+ fwrite (line_out, sizeof(char), offset_out, stdout); -+ START_NEW_LINE; -+ goto rescan; -+ } -+ -+ if (allocated_out < offset_out + mblength) -+ { -+ line_out = X2REALLOC (line_out, &allocated_out); -+ } -+ -+ memcpy (line_out + offset_out, bufpos, mblength); -+ offset_out += mblength; -+ column += increment; -+ -+ if (is_blank_seen && !convfail && wc == L'\r') -+ is_cr_after_last_blank = 1; -+ -+ if (is_bs_following_last_blank && !convfail && wc == L'\b') -+ ++bs_following_last_blank_num; -+ else -+ is_bs_following_last_blank = 0; -+ -+ if (break_spaces && !convfail && iswblank (wc)) -+ { -+ last_blank_pos = offset_out; -+ last_blank_column = column; -+ is_blank_seen = 1; -+ last_blank_increment = increment; -+ is_bs_following_last_blank = 1; -+ bs_following_last_blank_num = 0; -+ is_cr_after_last_blank = 0; -+ } -+ } -+ -+ *saved_errno = errno; - - if (offset_out) - fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); - -+} -+#endif -+ -+/* Fold file FILENAME, or standard input if FILENAME is "-", -+ to stdout, with maximum line length WIDTH. -+ Return 0 if successful, 1 if an error occurs. */ -+ -+static bool -+fold_file (char const *filename, size_t width) -+{ -+ FILE *istream; -+ int saved_errno; -+ -+ if (STREQ (filename, "-")) -+ { -+ istream = stdin; -+ have_read_stdin = 1; -+ } -+ else -+ istream = fopen (filename, "r"); -+ -+ if (istream == NULL) -+ { -+ error (0, errno, "%s", filename); -+ return 1; -+ } -+ -+ /* Define how ISTREAM is being folded. */ -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ fold_multibyte_text (istream, width, &saved_errno); -+ else -+#endif -+ fold_text (istream, width, &saved_errno); -+ - if (ferror (istream)) - { - error (0, saved_errno, "%s", quotef (filename)); -@@ -252,7 +498,8 @@ main (int argc, char **argv) - - atexit (close_stdout); - -- break_spaces = count_bytes = have_read_stdin = false; -+ operating_mode = column_mode; -+ break_spaces = have_read_stdin = false; - - while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) - { -@@ -261,7 +508,15 @@ main (int argc, char **argv) - switch (optc) - { - case 'b': /* Count bytes rather than columns. */ -- count_bytes = true; -+ if (operating_mode != column_mode) -+ FATAL_ERROR (_("only one way of folding may be specified")); -+ operating_mode = byte_mode; -+ break; -+ -+ case 'c': -+ if (operating_mode != column_mode) -+ FATAL_ERROR (_("only one way of folding may be specified")); -+ operating_mode = character_mode; - break; - - case 's': /* Break at word boundaries. */ -diff --git a/src/join.c b/src/join.c -index 1accfd4..e27243f 100644 ---- a/src/join.c -+++ b/src/join.c -@@ -22,19 +22,33 @@ - #include - #include - -+/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include -+#endif -+ -+/* Get iswblank(), towupper. */ -+#if HAVE_WCTYPE_H -+# include -+#endif -+ - #include "system.h" - #include "die.h" - #include "error.h" - #include "fadvise.h" - #include "hard-locale.h" - #include "linebuffer.h" --#include "memcasecmp.h" - #include "quote.h" - #include "stdio--.h" - #include "xmemcoll.h" - #include "xstrtol.h" - #include "argmatch.h" - -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - /* The official name of this program (e.g., no 'g' prefix). */ - #define PROGRAM_NAME "join" - -@@ -136,10 +150,12 @@ static struct outlist outlist_head; - /* Last element in 'outlist', where a new element can be added. */ - static struct outlist *outlist_end = &outlist_head; - --/* Tab character separating fields. If negative, fields are separated -- by any nonempty string of blanks, otherwise by exactly one -- tab character whose value (when cast to unsigned char) equals TAB. */ --static int tab = -1; -+/* Tab character separating fields. If NULL, fields are separated -+ by any nonempty string of blanks. */ -+static char *tab = NULL; -+ -+/* The number of bytes used for tab. */ -+static size_t tablen = 0; - - /* If nonzero, check that the input is correctly ordered. */ - static enum -@@ -276,13 +292,14 @@ xfields (struct line *line) - if (ptr == lim) - return; - -- if (0 <= tab && tab != '\n') -+ if (tab != NULL) - { -+ unsigned char t = tab[0]; - char *sep; -- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) -+ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) - extract_field (line, ptr, sep - ptr); - } -- else if (tab < 0) -+ else - { - /* Skip leading blanks before the first field. */ - while (field_sep (*ptr)) -@@ -306,6 +323,147 @@ xfields (struct line *line) - extract_field (line, ptr, lim - ptr); - } - -+#if HAVE_MBRTOWC -+static void -+xfields_multibyte (struct line *line) -+{ -+ char *ptr = line->buf.buffer; -+ char const *lim = ptr + line->buf.length - 1; -+ wchar_t wc = 0; -+ size_t mblength = 1; -+ mbstate_t state, state_bak; -+ -+ memset (&state, 0, sizeof (mbstate_t)); -+ -+ if (ptr >= lim) -+ return; -+ -+ if (tab != NULL) -+ { -+ char *sep = ptr; -+ for (; ptr < lim; ptr = sep + mblength) -+ { -+ sep = ptr; -+ while (sep < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (mblength == tablen && !memcmp (sep, tab, mblength)) -+ break; -+ else -+ { -+ sep += mblength; -+ continue; -+ } -+ } -+ -+ if (sep >= lim) -+ break; -+ -+ extract_field (line, ptr, sep - ptr); -+ } -+ } -+ else -+ { -+ /* Skip leading blanks before the first field. */ -+ while(ptr < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (!iswblank(wc) && wc != '\n') -+ break; -+ ptr += mblength; -+ } -+ -+ do -+ { -+ char *sep; -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ sep = ptr + mblength; -+ while (sep < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (iswblank (wc) || wc == '\n') -+ break; -+ -+ sep += mblength; -+ } -+ -+ extract_field (line, ptr, sep - ptr); -+ if (sep >= lim) -+ return; -+ -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ ptr = sep + mblength; -+ while (ptr < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (!iswblank (wc) && wc != '\n') -+ break; -+ -+ ptr += mblength; -+ } -+ } -+ while (ptr < lim); -+ } -+ -+ extract_field (line, ptr, lim - ptr); -+} -+#endif -+ - static void - freeline (struct line *line) - { -@@ -327,56 +485,133 @@ keycmp (struct line const *line1, struct line const *line2, - size_t jf_1, size_t jf_2) - { - /* Start of field to compare in each file. */ -- char *beg1; -- char *beg2; -- -- size_t len1; -- size_t len2; /* Length of fields to compare. */ -+ char *beg[2]; -+ char *copy[2]; -+ size_t len[2]; /* Length of fields to compare. */ - int diff; -+ int i, j; -+ int mallocd = 0; - - if (jf_1 < line1->nfields) - { -- beg1 = line1->fields[jf_1].beg; -- len1 = line1->fields[jf_1].len; -+ beg[0] = line1->fields[jf_1].beg; -+ len[0] = line1->fields[jf_1].len; - } - else - { -- beg1 = NULL; -- len1 = 0; -+ beg[0] = NULL; -+ len[0] = 0; - } - - if (jf_2 < line2->nfields) - { -- beg2 = line2->fields[jf_2].beg; -- len2 = line2->fields[jf_2].len; -+ beg[1] = line2->fields[jf_2].beg; -+ len[1] = line2->fields[jf_2].len; - } - else - { -- beg2 = NULL; -- len2 = 0; -+ beg[1] = NULL; -+ len[1] = 0; - } - -- if (len1 == 0) -- return len2 == 0 ? 0 : -1; -- if (len2 == 0) -+ if (len[0] == 0) -+ return len[1] == 0 ? 0 : -1; -+ if (len[1] == 0) - return 1; - - if (ignore_case) - { -- /* FIXME: ignore_case does not work with NLS (in particular, -- with multibyte chars). */ -- diff = memcasecmp (beg1, beg2, MIN (len1, len2)); -+#ifdef HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ size_t mblength; -+ wchar_t wc, uwc; -+ mbstate_t state, state_bak; -+ -+ memset (&state, '\0', sizeof (mbstate_t)); -+ -+ for (i = 0; i < 2; i++) -+ { -+ mallocd = 1; -+ copy[i] = xmalloc (len[i] + 1); -+ memset (copy[i], '\0',len[i] + 1); -+ -+ for (j = 0; j < MIN (len[0], len[1]);) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); -+ -+ switch (mblength) -+ { -+ case (size_t) -1: -+ case (size_t) -2: -+ state = state_bak; -+ /* Fall through */ -+ case 0: -+ mblength = 1; -+ break; -+ -+ default: -+ uwc = towupper (wc); -+ -+ if (uwc != wc) -+ { -+ mbstate_t state_wc; -+ size_t mblen; -+ -+ memset (&state_wc, '\0', sizeof (mbstate_t)); -+ mblen = wcrtomb (copy[i] + j, uwc, &state_wc); -+ assert (mblen != (size_t)-1); -+ } -+ else -+ memcpy (copy[i] + j, beg[i] + j, mblength); -+ } -+ j += mblength; -+ } -+ copy[i][j] = '\0'; -+ } -+ } -+ else -+#endif -+ { -+ for (i = 0; i < 2; i++) -+ { -+ mallocd = 1; -+ copy[i] = xmalloc (len[i] + 1); -+ -+ for (j = 0; j < MIN (len[0], len[1]); j++) -+ copy[i][j] = toupper (beg[i][j]); -+ -+ copy[i][j] = '\0'; -+ } -+ } - } - else - { -- if (hard_LC_COLLATE) -- return xmemcoll (beg1, len1, beg2, len2); -- diff = memcmp (beg1, beg2, MIN (len1, len2)); -+ copy[0] = beg[0]; -+ copy[1] = beg[1]; - } - -+ if (hard_LC_COLLATE) -+ { -+ diff = xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); -+ -+ if (mallocd) -+ for (i = 0; i < 2; i++) -+ free (copy[i]); -+ -+ return diff; -+ } -+ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); -+ -+ if (mallocd) -+ for (i = 0; i < 2; i++) -+ free (copy[i]); -+ -+ - if (diff) - return diff; -- return len1 < len2 ? -1 : len1 != len2; -+ return len[0] - len[1]; - } - - /* Check that successive input lines PREV and CURRENT from input file -@@ -468,6 +703,11 @@ get_line (FILE *fp, struct line **linep, int which) - } - ++line_no[which - 1]; - -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ xfields_multibyte (line); -+ else -+#endif - xfields (line); - - if (prevline[which - 1]) -@@ -563,21 +803,28 @@ prfield (size_t n, struct line const *line) - - /* Output all the fields in line, other than the join field. */ - -+#define PUT_TAB_CHAR \ -+ do \ -+ { \ -+ (tab != NULL) ? \ -+ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ -+ } \ -+ while (0) -+ - static void - prfields (struct line const *line, size_t join_field, size_t autocount) - { - size_t i; - size_t nfields = autoformat ? autocount : line->nfields; -- char output_separator = tab < 0 ? ' ' : tab; - - for (i = 0; i < join_field && i < nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line); - } - for (i = join_field + 1; i < nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line); - } - } -@@ -588,7 +835,6 @@ static void - prjoin (struct line const *line1, struct line const *line2) - { - const struct outlist *outlist; -- char output_separator = tab < 0 ? ' ' : tab; - size_t field; - struct line const *line; - -@@ -622,7 +868,7 @@ prjoin (struct line const *line1, struct line const *line2) - o = o->next; - if (o == NULL) - break; -- putchar (output_separator); -+ PUT_TAB_CHAR; - } - putchar (eolchar); - } -@@ -1098,20 +1344,43 @@ main (int argc, char **argv) - - case 't': - { -- unsigned char newtab = optarg[0]; -+ char *newtab = NULL; -+ size_t newtablen; -+ newtab = xstrdup (optarg); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ mbstate_t state; -+ -+ memset (&state, 0, sizeof (mbstate_t)); -+ newtablen = mbrtowc (NULL, newtab, -+ strnlen (newtab, MB_LEN_MAX), -+ &state); -+ if (newtablen == (size_t) 0 -+ || newtablen == (size_t) -1 -+ || newtablen == (size_t) -2) -+ newtablen = 1; -+ } -+ else -+#endif -+ newtablen = 1; - if (! newtab) -- newtab = '\n'; /* '' => process the whole line. */ -+ newtab = (char*)"\n"; /* '' => process the whole line. */ - else if (optarg[1]) - { -- if (STREQ (optarg, "\\0")) -- newtab = '\0'; -- else -- die (EXIT_FAILURE, 0, _("multi-character tab %s"), -- quote (optarg)); -+ if (newtablen == 1 && newtab[1]) -+ { -+ if (STREQ (newtab, "\\0")) -+ newtab[0] = '\0'; -+ } -+ } -+ if (tab != NULL && strcmp (tab, newtab)) -+ { -+ free (newtab); -+ die (EXIT_FAILURE, 0, _("incompatible tabs")); - } -- if (0 <= tab && tab != newtab) -- die (EXIT_FAILURE, 0, _("incompatible tabs")); - tab = newtab; -+ tablen = newtablen; - } - break; - -diff --git a/src/local.mk b/src/local.mk -index 72db9c7..ef3bfa4 100644 ---- a/src/local.mk -+++ b/src/local.mk -@@ -415,8 +415,8 @@ src_basenc_CPPFLAGS = -DBASE_TYPE=42 $(AM_CPPFLAGS) - - src_ginstall_CPPFLAGS = -DENABLE_MATCHPATHCON=1 $(AM_CPPFLAGS) - --src_expand_SOURCES = src/expand.c src/expand-common.c --src_unexpand_SOURCES = src/unexpand.c src/expand-common.c -+src_expand_SOURCES = src/expand.c src/expand-common.c lib/mbfile.c -+src_unexpand_SOURCES = src/unexpand.c src/expand-common.c lib/mbfile.c - - # Ensure we don't link against libcoreutils.a as that lib is - # not compiled with -fPIC which causes issues on 64 bit at least -diff --git a/src/pr.c b/src/pr.c -index 1606082..6374a7f 100644 ---- a/src/pr.c -+++ b/src/pr.c -@@ -311,6 +311,24 @@ - - #include - #include -+ -+/* Get MB_LEN_MAX. */ -+#include -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX == 1 -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Get MB_CUR_MAX. */ -+#include -+ -+/* Solaris 2.5 has a bug: must be included before . */ -+/* Get mbstate_t, mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include -+#endif -+ - #include "system.h" - #include "die.h" - #include "error.h" -@@ -325,6 +343,18 @@ - #include "xstrtol-error.h" - #include "xdectoint.h" - -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ -+#ifndef HAVE_DECL_WCWIDTH -+"this configure-time declaration test was not run" -+#endif -+#if !HAVE_DECL_WCWIDTH -+extern int wcwidth (); -+#endif -+ - /* The official name of this program (e.g., no 'g' prefix). */ - #define PROGRAM_NAME "pr" - -@@ -417,7 +447,20 @@ struct COLUMN - - typedef struct COLUMN COLUMN; - --static int char_to_clump (char c); -+/* Funtion pointers to switch functions for single byte locale or for -+ multibyte locale. If multibyte functions do not exist in your sysytem, -+ these pointers always point the function for single byte locale. */ -+static void (*print_char) (char c); -+static int (*char_to_clump) (char c); -+ -+/* Functions for single byte locale. */ -+static void print_char_single (char c); -+static int char_to_clump_single (char c); -+ -+/* Functions for multibyte locale. */ -+static void print_char_multi (char c); -+static int char_to_clump_multi (char c); -+ - static bool read_line (COLUMN *p); - static bool print_page (void); - static bool print_stored (COLUMN *p); -@@ -429,6 +472,7 @@ static void add_line_number (COLUMN *p); - static void getoptnum (const char *n_str, int min, int *num, - const char *errfmt); - static void getoptarg (char *arg, char switch_char, char *character, -+ int *character_length, int *character_width, - int *number); - static void print_files (int number_of_files, char **av); - static void init_parameters (int number_of_files); -@@ -442,7 +486,6 @@ static void store_char (char c); - static void pad_down (unsigned int lines); - static void read_rest_of_line (COLUMN *p); - static void skip_read (COLUMN *p, int column_number); --static void print_char (char c); - static void cleanup (void); - static void print_sep_string (void); - static void separator_string (const char *optarg_S); -@@ -454,7 +497,7 @@ static COLUMN *column_vector; - we store the leftmost columns contiguously in buff. - To print a line from buff, get the index of the first character - from line_vector[i], and print up to line_vector[i + 1]. */ --static char *buff; -+static unsigned char *buff; - - /* Index of the position in buff where the next character - will be stored. */ -@@ -558,7 +601,7 @@ static int chars_per_column; - static bool untabify_input = false; - - /* (-e) The input tab character. */ --static char input_tab_char = '\t'; -+static char input_tab_char[MB_LEN_MAX] = "\t"; - - /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... - where the leftmost column is 1. */ -@@ -568,7 +611,10 @@ static int chars_per_input_tab = 8; - static bool tabify_output = false; - - /* (-i) The output tab character. */ --static char output_tab_char = '\t'; -+static char output_tab_char[MB_LEN_MAX] = "\t"; -+ -+/* (-i) The byte length of output tab character. */ -+static int output_tab_char_length = 1; - - /* (-i) The width of the output tab. */ - static int chars_per_output_tab = 8; -@@ -638,7 +684,13 @@ static int line_number; - static bool numbered_lines = false; - - /* (-n) Character which follows each line number. */ --static char number_separator = '\t'; -+static char number_separator[MB_LEN_MAX] = "\t"; -+ -+/* (-n) The byte length of the character which follows each line number. */ -+static int number_separator_length = 1; -+ -+/* (-n) The character width of the character which follows each line number. */ -+static int number_separator_width = 0; - - /* (-n) line counting starts with 1st line of input file (not with 1st - line of 1st page printed). */ -@@ -691,6 +743,7 @@ static bool use_col_separator = false; - -a|COLUMN|-m is a 'space' and with the -J option a 'tab'. */ - static char const *col_sep_string = ""; - static int col_sep_length = 0; -+static int col_sep_width = 0; - static char *column_separator = (char *) " "; - static char *line_separator = (char *) "\t"; - -@@ -852,6 +905,13 @@ separator_string (const char *optarg_S) - integer_overflow (); - col_sep_length = len; - col_sep_string = optarg_S; -+ -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ col_sep_width = mbswidth (col_sep_string, 0); -+ else -+#endif -+ col_sep_width = col_sep_length; - } - - int -@@ -876,6 +936,21 @@ main (int argc, char **argv) - - atexit (close_stdout); - -+/* Define which functions are used, the ones for single byte locale or the ones -+ for multibyte locale. */ -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ print_char = print_char_multi; -+ char_to_clump = char_to_clump_multi; -+ } -+ else -+#endif -+ { -+ print_char = print_char_single; -+ char_to_clump = char_to_clump_single; -+ } -+ - n_files = 0; - file_names = (argc > 1 - ? xnmalloc (argc - 1, sizeof (char *)) -@@ -952,8 +1027,12 @@ main (int argc, char **argv) - break; - case 'e': - if (optarg) -- getoptarg (optarg, 'e', &input_tab_char, -- &chars_per_input_tab); -+ { -+ int dummy_length, dummy_width; -+ -+ getoptarg (optarg, 'e', input_tab_char, &dummy_length, -+ &dummy_width, &chars_per_input_tab); -+ } - /* Could check tab width > 0. */ - untabify_input = true; - break; -@@ -966,8 +1045,12 @@ main (int argc, char **argv) - break; - case 'i': - if (optarg) -- getoptarg (optarg, 'i', &output_tab_char, -- &chars_per_output_tab); -+ { -+ int dummy_width; -+ -+ getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length, -+ &dummy_width, &chars_per_output_tab); -+ } - /* Could check tab width > 0. */ - tabify_output = true; - break; -@@ -985,8 +1068,8 @@ main (int argc, char **argv) - case 'n': - numbered_lines = true; - if (optarg) -- getoptarg (optarg, 'n', &number_separator, -- &chars_per_number); -+ getoptarg (optarg, 'n', number_separator, &number_separator_length, -+ &number_separator_width, &chars_per_number); - break; - case 'N': - skip_count = false; -@@ -1011,6 +1094,7 @@ main (int argc, char **argv) - /* Reset an additional input of -s, -S dominates -s */ - col_sep_string = ""; - col_sep_length = 0; -+ col_sep_width = 0; - use_col_separator = true; - if (optarg) - separator_string (optarg); -@@ -1166,10 +1250,45 @@ getoptnum (const char *n_str, int min, int *num, const char *err) - a number. */ - - static void --getoptarg (char *arg, char switch_char, char *character, int *number) -+getoptarg (char *arg, char switch_char, char *character, int *character_length, -+ int *character_width, int *number) - { - if (!ISDIGIT (*arg)) -- *character = *arg++; -+ { -+#ifdef HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) /* for multibyte locale. */ -+ { -+ wchar_t wc; -+ size_t mblength; -+ int width; -+ mbstate_t state = {'\0'}; -+ -+ mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state); -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ *character_length = 1; -+ *character_width = 1; -+ } -+ else -+ { -+ *character_length = (mblength < 1) ? 1 : mblength; -+ width = wcwidth (wc); -+ *character_width = (width < 0) ? 0 : width; -+ } -+ -+ strncpy (character, arg, *character_length); -+ arg += *character_length; -+ } -+ else /* for single byte locale. */ -+#endif -+ { -+ *character = *arg++; -+ *character_length = 1; -+ *character_width = 1; -+ } -+ } -+ - if (*arg) - { - long int tmp_long; -@@ -1191,6 +1310,11 @@ static void - init_parameters (int number_of_files) - { - int chars_used_by_number = 0; -+ int mb_len = 1; -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ mb_len = MB_LEN_MAX; -+#endif - - lines_per_body = lines_per_page - lines_per_header - lines_per_footer; - if (lines_per_body <= 0) -@@ -1228,7 +1352,7 @@ init_parameters (int number_of_files) - else - col_sep_string = column_separator; - -- col_sep_length = 1; -+ col_sep_length = col_sep_width = 1; - use_col_separator = true; - } - /* It's rather pointless to define a TAB separator with column -@@ -1258,11 +1382,11 @@ init_parameters (int number_of_files) - + TAB_WIDTH (chars_per_input_tab, chars_per_number); */ - - /* Estimate chars_per_text without any margin and keep it constant. */ -- if (number_separator == '\t') -+ if (number_separator[0] == '\t') - number_width = (chars_per_number - + TAB_WIDTH (chars_per_default_tab, chars_per_number)); - else -- number_width = chars_per_number + 1; -+ number_width = chars_per_number + number_separator_width; - - /* The number is part of the column width unless we are - printing files in parallel. */ -@@ -1271,7 +1395,7 @@ init_parameters (int number_of_files) - } - - int sep_chars, useful_chars; -- if (INT_MULTIPLY_WRAPV (columns - 1, col_sep_length, &sep_chars)) -+ if (INT_MULTIPLY_WRAPV (columns - 1, col_sep_width, &sep_chars)) - sep_chars = INT_MAX; - if (INT_SUBTRACT_WRAPV (chars_per_line - chars_used_by_number, sep_chars, - &useful_chars)) -@@ -1294,7 +1418,7 @@ init_parameters (int number_of_files) - We've to use 8 as the lower limit, if we use chars_per_default_tab = 8 - to expand a tab which is not an input_tab-char. */ - free (clump_buff); -- clump_buff = xmalloc (MAX (8, chars_per_input_tab)); -+ clump_buff = xmalloc (mb_len * MAX (8, chars_per_input_tab)); - } - - /* Open the necessary files, -@@ -1400,7 +1524,7 @@ init_funcs (void) - - /* Enlarge p->start_position of first column to use the same form of - padding_not_printed with all columns. */ -- h = h + col_sep_length; -+ h = h + col_sep_width; - - /* This loop takes care of all but the rightmost column. */ - -@@ -1434,7 +1558,7 @@ init_funcs (void) - } - else - { -- h = h_next + col_sep_length; -+ h = h_next + col_sep_width; - h_next = h + chars_per_column; - } - } -@@ -1725,9 +1849,9 @@ static void - align_column (COLUMN *p) - { - padding_not_printed = p->start_position; -- if (col_sep_length < padding_not_printed) -+ if (col_sep_width < padding_not_printed) - { -- pad_across_to (padding_not_printed - col_sep_length); -+ pad_across_to (padding_not_printed - col_sep_width); - padding_not_printed = ANYWHERE; - } - -@@ -2002,13 +2126,13 @@ store_char (char c) - /* May be too generous. */ - buff = X2REALLOC (buff, &buff_allocated); - } -- buff[buff_current++] = c; -+ buff[buff_current++] = (unsigned char) c; - } - - static void - add_line_number (COLUMN *p) - { -- int i; -+ int i, j; - char *s; - int num_width; - -@@ -2025,22 +2149,24 @@ add_line_number (COLUMN *p) - /* Tabification is assumed for multiple columns, also for n-separators, - but 'default n-separator = TAB' hasn't been given priority over - equal column_width also specified by POSIX. */ -- if (number_separator == '\t') -+ if (number_separator[0] == '\t') - { - i = number_width - chars_per_number; - while (i-- > 0) - (p->char_func) (' '); - } - else -- (p->char_func) (number_separator); -+ for (j = 0; j < number_separator_length; j++) -+ (p->char_func) (number_separator[j]); - } - else - /* To comply with POSIX, we avoid any expansion of default TAB - separator with a single column output. No column_width requirement - has to be considered. */ - { -- (p->char_func) (number_separator); -- if (number_separator == '\t') -+ for (j = 0; j < number_separator_length; j++) -+ (p->char_func) (number_separator[j]); -+ if (number_separator[0] == '\t') - output_position = POS_AFTER_TAB (chars_per_output_tab, - output_position); - } -@@ -2199,7 +2325,7 @@ print_white_space (void) - while (goal - h_old > 1 - && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) - { -- putchar (output_tab_char); -+ fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout); - h_old = h_new; - } - while (++h_old <= goal) -@@ -2219,6 +2345,7 @@ print_sep_string (void) - { - char const *s = col_sep_string; - int l = col_sep_length; -+ int not_space_flag; - - if (separators_not_printed <= 0) - { -@@ -2230,6 +2357,7 @@ print_sep_string (void) - { - for (; separators_not_printed > 0; --separators_not_printed) - { -+ not_space_flag = 0; - while (l-- > 0) - { - /* 3 types of sep_strings: spaces only, spaces and chars, -@@ -2243,12 +2371,15 @@ print_sep_string (void) - } - else - { -+ not_space_flag = 1; - if (spaces_not_printed > 0) - print_white_space (); - putchar (*s++); -- ++output_position; - } - } -+ if (not_space_flag) -+ output_position += col_sep_width; -+ - /* sep_string ends with some spaces */ - if (spaces_not_printed > 0) - print_white_space (); -@@ -2276,7 +2407,7 @@ print_clump (COLUMN *p, int n, char *clump) - required number of tabs and spaces. */ - - static void --print_char (char c) -+print_char_single (char c) - { - if (tabify_output) - { -@@ -2300,6 +2431,74 @@ print_char (char c) - putchar (c); - } - -+#ifdef HAVE_MBRTOWC -+static void -+print_char_multi (char c) -+{ -+ static size_t mbc_pos = 0; -+ static char mbc[MB_LEN_MAX] = {'\0'}; -+ static mbstate_t state = {'\0'}; -+ mbstate_t state_bak; -+ wchar_t wc; -+ size_t mblength; -+ int width; -+ -+ if (tabify_output) -+ { -+ state_bak = state; -+ mbc[mbc_pos++] = c; -+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); -+ -+ while (mbc_pos > 0) -+ { -+ switch (mblength) -+ { -+ case (size_t)-2: -+ state = state_bak; -+ return; -+ -+ case (size_t)-1: -+ state = state_bak; -+ ++output_position; -+ putchar (mbc[0]); -+ memmove (mbc, mbc + 1, MB_CUR_MAX - 1); -+ --mbc_pos; -+ break; -+ -+ case 0: -+ mblength = 1; -+ -+ default: -+ if (wc == L' ') -+ { -+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); -+ --mbc_pos; -+ ++spaces_not_printed; -+ return; -+ } -+ else if (spaces_not_printed > 0) -+ print_white_space (); -+ -+ /* Nonprintables are assumed to have width 0, except L'\b'. */ -+ if ((width = wcwidth (wc)) < 1) -+ { -+ if (wc == L'\b') -+ --output_position; -+ } -+ else -+ output_position += width; -+ -+ fwrite (mbc, sizeof(char), mblength, stdout); -+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); -+ mbc_pos -= mblength; -+ } -+ } -+ return; -+ } -+ putchar (c); -+} -+#endif -+ - /* Skip to page PAGE before printing. - PAGE may be larger than total number of pages. */ - -@@ -2477,9 +2676,9 @@ read_line (COLUMN *p) - align_empty_cols = false; - } - -- if (col_sep_length < padding_not_printed) -+ if (col_sep_width < padding_not_printed) - { -- pad_across_to (padding_not_printed - col_sep_length); -+ pad_across_to (padding_not_printed - col_sep_width); - padding_not_printed = ANYWHERE; - } - -@@ -2548,7 +2747,7 @@ print_stored (COLUMN *p) - COLUMN *q; - - int line = p->current_line++; -- char *first = &buff[line_vector[line]]; -+ unsigned char *first = &buff[line_vector[line]]; - /* FIXME - UMR: Uninitialized memory read: - * This is occurring while in: -@@ -2560,7 +2759,7 @@ print_stored (COLUMN *p) - xmalloc [xmalloc.c:94] - init_store_cols [pr.c:1648] - */ -- char *last = &buff[line_vector[line + 1]]; -+ unsigned char *last = &buff[line_vector[line + 1]]; - - pad_vertically = true; - -@@ -2580,9 +2779,9 @@ print_stored (COLUMN *p) - } - } - -- if (col_sep_length < padding_not_printed) -+ if (col_sep_width < padding_not_printed) - { -- pad_across_to (padding_not_printed - col_sep_length); -+ pad_across_to (padding_not_printed - col_sep_width); - padding_not_printed = ANYWHERE; - } - -@@ -2595,8 +2794,8 @@ print_stored (COLUMN *p) - if (spaces_not_printed == 0) - { - output_position = p->start_position + end_vector[line]; -- if (p->start_position - col_sep_length == chars_per_margin) -- output_position -= col_sep_length; -+ if (p->start_position - col_sep_width == chars_per_margin) -+ output_position -= col_sep_width; - } - - return true; -@@ -2615,7 +2814,7 @@ print_stored (COLUMN *p) - number of characters is 1.) */ - - static int --char_to_clump (char c) -+char_to_clump_single (char c) - { - unsigned char uc = c; - char *s = clump_buff; -@@ -2625,10 +2824,10 @@ char_to_clump (char c) - int chars; - int chars_per_c = 8; - -- if (c == input_tab_char) -+ if (c == input_tab_char[0]) - chars_per_c = chars_per_input_tab; - -- if (c == input_tab_char || c == '\t') -+ if (c == input_tab_char[0] || c == '\t') - { - width = TAB_WIDTH (chars_per_c, input_position); - -@@ -2709,6 +2908,164 @@ char_to_clump (char c) - return chars; - } - -+#ifdef HAVE_MBRTOWC -+static int -+char_to_clump_multi (char c) -+{ -+ static size_t mbc_pos = 0; -+ static char mbc[MB_LEN_MAX] = {'\0'}; -+ static mbstate_t state = {'\0'}; -+ mbstate_t state_bak; -+ wchar_t wc; -+ size_t mblength; -+ int wc_width; -+ register char *s = clump_buff; -+ register int i, j; -+ char esc_buff[4]; -+ int width; -+ int chars; -+ int chars_per_c = 8; -+ -+ state_bak = state; -+ mbc[mbc_pos++] = c; -+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); -+ -+ width = 0; -+ chars = 0; -+ while (mbc_pos > 0) -+ { -+ switch (mblength) -+ { -+ case (size_t)-2: -+ state = state_bak; -+ return 0; -+ -+ case (size_t)-1: -+ state = state_bak; -+ mblength = 1; -+ -+ if (use_esc_sequence || use_cntrl_prefix) -+ { -+ width = +4; -+ chars = +4; -+ *s++ = '\\'; -+ sprintf (esc_buff, "%03o", (unsigned char) mbc[0]); -+ for (i = 0; i <= 2; ++i) -+ *s++ = (int) esc_buff[i]; -+ } -+ else -+ { -+ width += 1; -+ chars += 1; -+ *s++ = mbc[0]; -+ } -+ break; -+ -+ case 0: -+ mblength = 1; -+ /* Fall through */ -+ -+ default: -+ if (memcmp (mbc, input_tab_char, mblength) == 0) -+ chars_per_c = chars_per_input_tab; -+ -+ if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t') -+ { -+ int width_inc; -+ -+ width_inc = TAB_WIDTH (chars_per_c, input_position); -+ width += width_inc; -+ -+ if (untabify_input) -+ { -+ for (i = width_inc; i; --i) -+ *s++ = ' '; -+ chars += width_inc; -+ } -+ else -+ { -+ for (i = 0; i < mblength; i++) -+ *s++ = mbc[i]; -+ chars += mblength; -+ } -+ } -+ else if ((wc_width = wcwidth (wc)) < 1) -+ { -+ if (use_esc_sequence) -+ { -+ for (i = 0; i < mblength; i++) -+ { -+ width += 4; -+ chars += 4; -+ *s++ = '\\'; -+ sprintf (esc_buff, "%03o", (unsigned char) mbc[i]); -+ for (j = 0; j <= 2; ++j) -+ *s++ = (int) esc_buff[j]; -+ } -+ } -+ else if (use_cntrl_prefix) -+ { -+ if (wc < 0200) -+ { -+ width += 2; -+ chars += 2; -+ *s++ = '^'; -+ *s++ = wc ^ 0100; -+ } -+ else -+ { -+ for (i = 0; i < mblength; i++) -+ { -+ width += 4; -+ chars += 4; -+ *s++ = '\\'; -+ sprintf (esc_buff, "%03o", (unsigned char) mbc[i]); -+ for (j = 0; j <= 2; ++j) -+ *s++ = (int) esc_buff[j]; -+ } -+ } -+ } -+ else if (wc == L'\b') -+ { -+ width += -1; -+ chars += 1; -+ *s++ = c; -+ } -+ else -+ { -+ width += 0; -+ chars += mblength; -+ for (i = 0; i < mblength; i++) -+ *s++ = mbc[i]; -+ } -+ } -+ else -+ { -+ width += wc_width; -+ chars += mblength; -+ for (i = 0; i < mblength; i++) -+ *s++ = mbc[i]; -+ } -+ } -+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); -+ mbc_pos -= mblength; -+ } -+ -+ /* Too many backspaces must put us in position 0 -- never negative. */ -+ if (width < 0 && input_position == 0) -+ { -+ chars = 0; -+ input_position = 0; -+ } -+ else if (width < 0 && input_position <= -width) -+ input_position = 0; -+ else -+ input_position += width; -+ -+ return chars; -+} -+#endif -+ - /* We've just printed some files and need to clean up things before - looking for more options and printing the next batch of files. - -diff --git a/src/sort.c b/src/sort.c -index 329ed45..8e1533e 100644 ---- a/src/sort.c -+++ b/src/sort.c -@@ -29,6 +29,14 @@ - #include - #include - #include -+#if HAVE_WCHAR_H -+# include -+#endif -+/* Get isw* functions. */ -+#if HAVE_WCTYPE_H -+# include -+#endif -+ - #include "system.h" - #include "argmatch.h" - #include "die.h" -@@ -157,14 +165,39 @@ static int decimal_point; - /* Thousands separator; if -1, then there isn't one. */ - static int thousands_sep; - -+/* True if -f is specified. */ -+static bool folding; -+ - /* Nonzero if the corresponding locales are hard. */ - static bool hard_LC_COLLATE; --#if HAVE_NL_LANGINFO -+#if HAVE_LANGINFO_CODESET - static bool hard_LC_TIME; - #endif - - #define NONZERO(x) ((x) != 0) - -+/* get a multibyte character's byte length. */ -+#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \ -+ do \ -+ { \ -+ wchar_t wc; \ -+ mbstate_t state_bak; \ -+ \ -+ state_bak = STATE; \ -+ mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \ -+ \ -+ switch (MBLENGTH) \ -+ { \ -+ case (size_t)-1: \ -+ case (size_t)-2: \ -+ STATE = state_bak; \ -+ /* Fall through. */ \ -+ case 0: \ -+ MBLENGTH = 1; \ -+ } \ -+ } \ -+ while (0) -+ - /* The kind of blanks for '-b' to skip in various options. */ - enum blanktype { bl_start, bl_end, bl_both }; - -@@ -338,13 +371,11 @@ static bool reverse; - they were read if all keys compare equal. */ - static bool stable; - --/* If TAB has this value, blanks separate fields. */ --enum { TAB_DEFAULT = CHAR_MAX + 1 }; -- --/* Tab character separating fields. If TAB_DEFAULT, then fields are -+/* Tab character separating fields. If tab_length is 0, then fields are - separated by the empty string between a non-blank character and a blank - character. */ --static int tab = TAB_DEFAULT; -+static char tab[MB_LEN_MAX + 1]; -+static size_t tab_length = 0; - - /* Flag to remove consecutive duplicate lines from the output. - Only the last of a sequence of equal lines will be output. */ -@@ -802,6 +833,46 @@ reap_all (void) - reap (-1); - } - -+/* Function pointers. */ -+static void -+(*inittables) (void); -+static char * -+(*begfield) (const struct line*, const struct keyfield *); -+static char * -+(*limfield) (const struct line*, const struct keyfield *); -+static void -+(*skipblanks) (char **ptr, char *lim); -+static int -+(*getmonth) (char const *, size_t, char **); -+static int -+(*keycompare) (const struct line *, const struct line *); -+static int -+(*numcompare) (const char *, const char *); -+ -+/* Test for white space multibyte character. -+ Set LENGTH the byte length of investigated multibyte character. */ -+#if HAVE_MBRTOWC -+static int -+ismbblank (const char *str, size_t len, size_t *length) -+{ -+ size_t mblength; -+ wchar_t wc; -+ mbstate_t state; -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ mblength = mbrtowc (&wc, str, len, &state); -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ *length = 1; -+ return 0; -+ } -+ -+ *length = (mblength < 1) ? 1 : mblength; -+ return iswblank (wc) || wc == '\n'; -+} -+#endif -+ - /* Clean up any remaining temporary files. */ - - static void -@@ -1270,7 +1341,7 @@ zaptemp (char const *name) - free (node); - } - --#if HAVE_NL_LANGINFO -+#if HAVE_LANGINFO_CODESET - - static int - struct_month_cmp (void const *m1, void const *m2) -@@ -1285,7 +1356,7 @@ struct_month_cmp (void const *m1, void const *m2) - /* Initialize the character class tables. */ - - static void --inittables (void) -+inittables_uni (void) - { - size_t i; - -@@ -1297,7 +1368,7 @@ inittables (void) - fold_toupper[i] = toupper (i); - } - --#if HAVE_NL_LANGINFO -+#if HAVE_LANGINFO_CODESET - /* If we're not in the "C" locale, read different names for months. */ - if (hard_LC_TIME) - { -@@ -1379,6 +1450,84 @@ specify_nmerge (int oi, char c, char const *s) - xstrtol_fatal (e, oi, c, long_options, s); - } - -+#if HAVE_MBRTOWC -+static void -+inittables_mb (void) -+{ -+ int i, j, k, l; -+ char *name, *s, *lc_time, *lc_ctype; -+ size_t s_len, mblength; -+ char mbc[MB_LEN_MAX]; -+ wchar_t wc, pwc; -+ mbstate_t state_mb, state_wc; -+ -+ lc_time = setlocale (LC_TIME, ""); -+ if (lc_time) -+ lc_time = xstrdup (lc_time); -+ -+ lc_ctype = setlocale (LC_CTYPE, ""); -+ if (lc_ctype) -+ lc_ctype = xstrdup (lc_ctype); -+ -+ if (lc_time && lc_ctype) -+ /* temporarily set LC_CTYPE to match LC_TIME, so that we can convert -+ * the names of months to upper case */ -+ setlocale (LC_CTYPE, lc_time); -+ -+ for (i = 0; i < MONTHS_PER_YEAR; i++) -+ { -+ s = (char *) nl_langinfo (ABMON_1 + i); -+ s_len = strlen (s); -+ monthtab[i].name = name = (char *) xmalloc (s_len + 1); -+ monthtab[i].val = i + 1; -+ -+ memset (&state_mb, '\0', sizeof (mbstate_t)); -+ memset (&state_wc, '\0', sizeof (mbstate_t)); -+ -+ for (j = 0; j < s_len;) -+ { -+ if (!ismbblank (s + j, s_len - j, &mblength)) -+ break; -+ j += mblength; -+ } -+ -+ for (k = 0; j < s_len;) -+ { -+ mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb); -+ assert (mblength != (size_t)-1 && mblength != (size_t)-2); -+ if (mblength == 0) -+ break; -+ -+ pwc = towupper (wc); -+ if (pwc == wc) -+ { -+ memcpy (mbc, s + j, mblength); -+ j += mblength; -+ } -+ else -+ { -+ j += mblength; -+ mblength = wcrtomb (mbc, pwc, &state_wc); -+ assert (mblength != (size_t)0 && mblength != (size_t)-1); -+ } -+ -+ for (l = 0; l < mblength; l++) -+ name[k++] = mbc[l]; -+ } -+ name[k] = '\0'; -+ } -+ qsort ((void *) monthtab, MONTHS_PER_YEAR, -+ sizeof (struct month), struct_month_cmp); -+ -+ if (lc_time && lc_ctype) -+ /* restore the original locales */ -+ setlocale (LC_CTYPE, lc_ctype); -+ -+ free (lc_ctype); -+ free (lc_time); -+} -+#endif -+ - /* Specify the amount of main memory to use when sorting. */ - static void - specify_sort_size (int oi, char c, char const *s) -@@ -1610,7 +1759,7 @@ buffer_linelim (struct buffer const *buf) - by KEY in LINE. */ - - static char * --begfield (struct line const *line, struct keyfield const *key) -+begfield_uni (const struct line *line, const struct keyfield *key) - { - char *ptr = line->text, *lim = ptr + line->length - 1; - size_t sword = key->sword; -@@ -1619,10 +1768,10 @@ begfield (struct line const *line, struct keyfield const *key) - /* The leading field separator itself is included in a field when -t - is absent. */ - -- if (tab != TAB_DEFAULT) -+ if (tab_length) - while (ptr < lim && sword--) - { -- while (ptr < lim && *ptr != tab) -+ while (ptr < lim && *ptr != tab[0]) - ++ptr; - if (ptr < lim) - ++ptr; -@@ -1648,11 +1797,70 @@ begfield (struct line const *line, struct keyfield const *key) - return ptr; - } - -+#if HAVE_MBRTOWC -+static char * -+begfield_mb (const struct line *line, const struct keyfield *key) -+{ -+ int i; -+ char *ptr = line->text, *lim = ptr + line->length - 1; -+ size_t sword = key->sword; -+ size_t schar = key->schar; -+ size_t mblength; -+ mbstate_t state; -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ if (tab_length) -+ while (ptr < lim && sword--) -+ { -+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ if (ptr < lim) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ } -+ else -+ while (ptr < lim && sword--) -+ { -+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ if (ptr < lim) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ } -+ -+ if (key->skipsblanks) -+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ -+ for (i = 0; i < schar; i++) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ -+ if (ptr + mblength > lim) -+ break; -+ else -+ ptr += mblength; -+ } -+ -+ return ptr; -+} -+#endif -+ - /* Return the limit of (a pointer to the first character after) the field - in LINE specified by KEY. */ - - static char * --limfield (struct line const *line, struct keyfield const *key) -+limfield_uni (const struct line *line, const struct keyfield *key) - { - char *ptr = line->text, *lim = ptr + line->length - 1; - size_t eword = key->eword, echar = key->echar; -@@ -1667,10 +1875,10 @@ limfield (struct line const *line, struct keyfield const *key) - 'beginning' is the first character following the delimiting TAB. - Otherwise, leave PTR pointing at the first 'blank' character after - the preceding field. */ -- if (tab != TAB_DEFAULT) -+ if (tab_length) - while (ptr < lim && eword--) - { -- while (ptr < lim && *ptr != tab) -+ while (ptr < lim && *ptr != tab[0]) - ++ptr; - if (ptr < lim && (eword || echar)) - ++ptr; -@@ -1716,10 +1924,10 @@ limfield (struct line const *line, struct keyfield const *key) - */ - - /* Make LIM point to the end of (one byte past) the current field. */ -- if (tab != TAB_DEFAULT) -+ if (tab_length) - { - char *newlim; -- newlim = memchr (ptr, tab, lim - ptr); -+ newlim = memchr (ptr, tab[0], lim - ptr); - if (newlim) - lim = newlim; - } -@@ -1750,6 +1958,130 @@ limfield (struct line const *line, struct keyfield const *key) - return ptr; - } - -+#if HAVE_MBRTOWC -+static char * -+limfield_mb (const struct line *line, const struct keyfield *key) -+{ -+ char *ptr = line->text, *lim = ptr + line->length - 1; -+ size_t eword = key->eword, echar = key->echar; -+ int i; -+ size_t mblength; -+ mbstate_t state; -+ -+ if (echar == 0) -+ eword++; /* skip all of end field. */ -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ if (tab_length) -+ while (ptr < lim && eword--) -+ { -+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ if (ptr < lim && (eword | echar)) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ } -+ else -+ while (ptr < lim && eword--) -+ { -+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ if (ptr < lim) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ } -+ -+ -+# ifdef POSIX_UNSPECIFIED -+ /* Make LIM point to the end of (one byte past) the current field. */ -+ if (tab_length) -+ { -+ char *newlim, *p; -+ -+ newlim = NULL; -+ for (p = ptr; p < lim;) -+ { -+ if (memcmp (p, tab, tab_length) == 0) -+ { -+ newlim = p; -+ break; -+ } -+ -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ p += mblength; -+ } -+ } -+ else -+ { -+ char *newlim; -+ newlim = ptr; -+ -+ while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength)) -+ newlim += mblength; -+ if (ptr < lim) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength)) -+ newlim += mblength; -+ lim = newlim; -+ } -+# endif -+ -+ if (echar != 0) -+ { -+ /* If we're skipping leading blanks, don't start counting characters -+ * until after skipping past any leading blanks. */ -+ if (key->skipeblanks) -+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ /* Advance PTR by ECHAR (if possible), but no further than LIM. */ -+ for (i = 0; i < echar; i++) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ -+ if (ptr + mblength > lim) -+ break; -+ else -+ ptr += mblength; -+ } -+ } -+ -+ return ptr; -+} -+#endif -+ -+static void -+skipblanks_uni (char **ptr, char *lim) -+{ -+ while (*ptr < lim && blanks[to_uchar (**ptr)]) -+ ++(*ptr); -+} -+ -+#if HAVE_MBRTOWC -+static void -+skipblanks_mb (char **ptr, char *lim) -+{ -+ size_t mblength; -+ while (*ptr < lim && ismbblank (*ptr, lim - *ptr, &mblength)) -+ (*ptr) += mblength; -+} -+#endif -+ - /* Fill BUF reading from FP, moving buf->left bytes from the end - of buf->buf to the beginning first. If EOF is reached and the - file wasn't terminated by a newline, supply one. Set up BUF's line -@@ -1836,8 +2168,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file) - else - { - if (key->skipsblanks) -- while (blanks[to_uchar (*line_start)]) -- line_start++; -+ { -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ size_t mblength; -+ while (line_start < line->keylim && -+ ismbblank (line_start, -+ line->keylim - line_start, -+ &mblength)) -+ line_start += mblength; -+ } -+ else -+#endif -+ while (blanks[to_uchar (*line_start)]) -+ line_start++; -+ } - line->keybeg = line_start; - } - } -@@ -1971,12 +2317,10 @@ find_unit_order (char const *number) - < K/k < M < G < T < P < E < Z < Y */ - - static int --human_numcompare (char const *a, char const *b) -+human_numcompare (char *a, char *b) - { -- while (blanks[to_uchar (*a)]) -- a++; -- while (blanks[to_uchar (*b)]) -- b++; -+ skipblanks(&a, a + strlen(a)); -+ skipblanks(&b, b + strlen(b)); - - int diff = find_unit_order (a) - find_unit_order (b); - return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep)); -@@ -1987,7 +2331,7 @@ human_numcompare (char const *a, char const *b) - hideously fast. */ - - static int --numcompare (char const *a, char const *b) -+numcompare_uni (const char *a, const char *b) - { - while (blanks[to_uchar (*a)]) - a++; -@@ -1997,6 +2341,25 @@ numcompare (char const *a, char const *b) - return strnumcmp (a, b, decimal_point, thousands_sep); - } - -+#if HAVE_MBRTOWC -+static int -+numcompare_mb (const char *a, const char *b) -+{ -+ size_t mblength, len; -+ len = strlen (a); /* okay for UTF-8 */ -+ while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) -+ { -+ a += mblength; -+ len -= mblength; -+ } -+ len = strlen (b); /* okay for UTF-8 */ -+ while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) -+ b += mblength; -+ -+ return strnumcmp (a, b, decimal_point, thousands_sep); -+} -+#endif /* HAV_EMBRTOWC */ -+ - /* Work around a problem whereby the long double value returned by glibc's - strtold ("NaN", ...) contains uninitialized bits: clear all bytes of - A and B before calling strtold. FIXME: remove this function if -@@ -2047,7 +2410,7 @@ general_numcompare (char const *sa, char const *sb) - Return 0 if the name in S is not recognized. */ - - static int --getmonth (char const *month, char **ea) -+getmonth_uni (char const *month, size_t len, char **ea) - { - size_t lo = 0; - size_t hi = MONTHS_PER_YEAR; -@@ -2323,15 +2686,14 @@ debug_key (struct line const *line, struct keyfield const *key) - char saved = *lim; - *lim = '\0'; - -- while (blanks[to_uchar (*beg)]) -- beg++; -+ skipblanks (&beg, lim); - - char *tighter_lim = beg; - - if (lim < beg) - tighter_lim = lim; - else if (key->month) -- getmonth (beg, &tighter_lim); -+ getmonth (beg, lim-beg, &tighter_lim); - else if (key->general_numeric) - ignore_value (strtold (beg, &tighter_lim)); - else if (key->numeric || key->human_numeric) -@@ -2465,7 +2827,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) - /* Warn about significant leading blanks. */ - bool implicit_skip = key_numeric (key) || key->month; - bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */ -- if (!zero_width && !gkey_only && tab == TAB_DEFAULT && !line_offset -+ if (!zero_width && !gkey_only && !tab_length && !line_offset - && ((!key->skipsblanks && !implicit_skip) - || (!key->skipsblanks && key->schar) - || (!key->skipeblanks && key->echar))) -@@ -2523,11 +2885,87 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) - error (0, 0, _("option '-r' only applies to last-resort comparison")); - } - -+#if HAVE_MBRTOWC -+static int -+getmonth_mb (const char *s, size_t len, char **ea) -+{ -+ char *month; -+ register size_t i; -+ register int lo = 0, hi = MONTHS_PER_YEAR, result; -+ char *tmp; -+ size_t wclength, mblength; -+ const char *pp; -+ const wchar_t *wpp; -+ wchar_t *month_wcs; -+ mbstate_t state; -+ -+ while (len > 0 && ismbblank (s, len, &mblength)) -+ { -+ s += mblength; -+ len -= mblength; -+ } -+ -+ if (len == 0) -+ return 0; -+ -+ if (SIZE_MAX - len < 1) -+ xalloc_die (); -+ -+ month = (char *) xnmalloc (len + 1, MB_CUR_MAX); -+ -+ pp = tmp = (char *) xnmalloc (len + 1, MB_CUR_MAX); -+ memcpy (tmp, s, len); -+ tmp[len] = '\0'; -+ wpp = month_wcs = (wchar_t *) xnmalloc (len + 1, sizeof (wchar_t)); -+ memset (&state, '\0', sizeof (mbstate_t)); -+ -+ wclength = mbsrtowcs (month_wcs, &pp, len + 1, &state); -+ if (wclength == (size_t)-1 || pp != NULL) -+ error (SORT_FAILURE, 0, _("Invalid multibyte input %s."), quote(s)); -+ -+ for (i = 0; i < wclength; i++) -+ { -+ month_wcs[i] = towupper(month_wcs[i]); -+ if (iswblank (month_wcs[i])) -+ { -+ month_wcs[i] = L'\0'; -+ break; -+ } -+ } -+ -+ mblength = wcsrtombs (month, &wpp, (len + 1) * MB_CUR_MAX, &state); -+ assert (mblength != (-1) && wpp == NULL); -+ -+ do -+ { -+ int ix = (lo + hi) / 2; -+ -+ if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) -+ hi = ix; -+ else -+ lo = ix; -+ } -+ while (hi - lo > 1); -+ -+ result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) -+ ? monthtab[lo].val : 0); -+ -+ if (ea && result) -+ *ea = (char*) s + strlen (monthtab[lo].name); -+ -+ free (month); -+ free (tmp); -+ free (month_wcs); -+ -+ return result; -+} -+#endif -+ - /* Compare two lines A and B trying every key in sequence until there - are no more keys or a difference is found. */ - - static int --keycompare (struct line const *a, struct line const *b) -+keycompare_uni (const struct line *a, const struct line *b) - { - struct keyfield *key = keylist; - -@@ -2612,7 +3050,7 @@ keycompare (struct line const *a, struct line const *b) - else if (key->human_numeric) - diff = human_numcompare (ta, tb); - else if (key->month) -- diff = getmonth (ta, NULL) - getmonth (tb, NULL); -+ diff = getmonth (ta, tlena, NULL) - getmonth (tb, tlenb, NULL); - else if (key->random) - diff = compare_random (ta, tlena, tb, tlenb); - else if (key->version) -@@ -2728,6 +3166,211 @@ keycompare (struct line const *a, struct line const *b) - return key->reverse ? -diff : diff; - } - -+#if HAVE_MBRTOWC -+static int -+keycompare_mb (const struct line *a, const struct line *b) -+{ -+ struct keyfield *key = keylist; -+ -+ /* For the first iteration only, the key positions have been -+ precomputed for us. */ -+ char *texta = a->keybeg; -+ char *textb = b->keybeg; -+ char *lima = a->keylim; -+ char *limb = b->keylim; -+ -+ size_t mblength_a, mblength_b; -+ wchar_t wc_a, wc_b; -+ mbstate_t state_a, state_b; -+ -+ int diff = 0; -+ -+ memset (&state_a, '\0', sizeof(mbstate_t)); -+ memset (&state_b, '\0', sizeof(mbstate_t)); -+ /* Ignore keys with start after end. */ -+ if (a->keybeg - a->keylim > 0) -+ return 0; -+ -+ -+ /* Ignore and/or translate chars before comparing. */ -+# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ -+ do \ -+ { \ -+ wchar_t uwc; \ -+ char mbc[MB_LEN_MAX]; \ -+ mbstate_t state_wc; \ -+ \ -+ for (NEW_LEN = i = 0; i < LEN;) \ -+ { \ -+ mbstate_t state_bak; \ -+ \ -+ state_bak = STATE; \ -+ MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \ -+ \ -+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \ -+ || MBLENGTH == 0) \ -+ { \ -+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \ -+ STATE = state_bak; \ -+ if (!ignore) \ -+ COPY[NEW_LEN++] = TEXT[i]; \ -+ i++; \ -+ continue; \ -+ } \ -+ \ -+ if (ignore) \ -+ { \ -+ if ((ignore == nonprinting && !iswprint (WC)) \ -+ || (ignore == nondictionary \ -+ && !iswalnum (WC) && !iswblank (WC))) \ -+ { \ -+ i += MBLENGTH; \ -+ continue; \ -+ } \ -+ } \ -+ \ -+ if (translate) \ -+ { \ -+ \ -+ uwc = towupper(WC); \ -+ if (WC == uwc) \ -+ { \ -+ memcpy (mbc, TEXT + i, MBLENGTH); \ -+ i += MBLENGTH; \ -+ } \ -+ else \ -+ { \ -+ i += MBLENGTH; \ -+ WC = uwc; \ -+ memset (&state_wc, '\0', sizeof (mbstate_t)); \ -+ \ -+ MBLENGTH = wcrtomb (mbc, WC, &state_wc); \ -+ assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \ -+ } \ -+ \ -+ for (j = 0; j < MBLENGTH; j++) \ -+ COPY[NEW_LEN++] = mbc[j]; \ -+ } \ -+ else \ -+ for (j = 0; j < MBLENGTH; j++) \ -+ COPY[NEW_LEN++] = TEXT[i++]; \ -+ } \ -+ COPY[NEW_LEN] = '\0'; \ -+ } \ -+ while (0) -+ -+ /* Actually compare the fields. */ -+ -+ for (;;) -+ { -+ /* Find the lengths. */ -+ size_t lena = lima <= texta ? 0 : lima - texta; -+ size_t lenb = limb <= textb ? 0 : limb - textb; -+ -+ char enda IF_LINT (= 0); -+ char endb IF_LINT (= 0); -+ -+ char const *translate = key->translate; -+ bool const *ignore = key->ignore; -+ -+ if (ignore || translate) -+ { -+ if (SIZE_MAX - lenb - 2 < lena) -+ xalloc_die (); -+ char *copy_a = (char *) xnmalloc (lena + lenb + 2, MB_CUR_MAX); -+ char *copy_b = copy_a + lena * MB_CUR_MAX + 1; -+ size_t new_len_a, new_len_b; -+ size_t i, j; -+ -+ IGNORE_CHARS (new_len_a, lena, texta, copy_a, -+ wc_a, mblength_a, state_a); -+ IGNORE_CHARS (new_len_b, lenb, textb, copy_b, -+ wc_b, mblength_b, state_b); -+ texta = copy_a; textb = copy_b; -+ lena = new_len_a; lenb = new_len_b; -+ } -+ else -+ { -+ /* Use the keys in-place, temporarily null-terminated. */ -+ enda = texta[lena]; texta[lena] = '\0'; -+ endb = textb[lenb]; textb[lenb] = '\0'; -+ } -+ -+ if (key->random) -+ diff = compare_random (texta, lena, textb, lenb); -+ else if (key->numeric | key->general_numeric | key->human_numeric) -+ { -+ char savea = *lima, saveb = *limb; -+ -+ *lima = *limb = '\0'; -+ diff = (key->numeric ? numcompare (texta, textb) -+ : key->general_numeric ? general_numcompare (texta, textb) -+ : human_numcompare (texta, textb)); -+ *lima = savea, *limb = saveb; -+ } -+ else if (key->version) -+ diff = filevercmp (texta, textb); -+ else if (key->month) -+ diff = getmonth (texta, lena, NULL) - getmonth (textb, lenb, NULL); -+ else if (lena == 0) -+ diff = - NONZERO (lenb); -+ else if (lenb == 0) -+ diff = 1; -+ else if (hard_LC_COLLATE && !folding) -+ { -+ diff = xmemcoll0 (texta, lena + 1, textb, lenb + 1); -+ } -+ else -+ { -+ diff = memcmp (texta, textb, MIN (lena, lenb)); -+ if (diff == 0) -+ diff = lena < lenb ? -1 : lena != lenb; -+ } -+ -+ if (ignore || translate) -+ free (texta); -+ else -+ { -+ texta[lena] = enda; -+ textb[lenb] = endb; -+ } -+ -+ if (diff) -+ goto not_equal; -+ -+ key = key->next; -+ if (! key) -+ break; -+ -+ /* Find the beginning and limit of the next field. */ -+ if (key->eword != -1) -+ lima = limfield (a, key), limb = limfield (b, key); -+ else -+ lima = a->text + a->length - 1, limb = b->text + b->length - 1; -+ -+ if (key->sword != -1) -+ texta = begfield (a, key), textb = begfield (b, key); -+ else -+ { -+ texta = a->text, textb = b->text; -+ if (key->skipsblanks) -+ { -+ while (texta < lima && ismbblank (texta, lima - texta, &mblength_a)) -+ texta += mblength_a; -+ while (textb < limb && ismbblank (textb, limb - textb, &mblength_b)) -+ textb += mblength_b; -+ } -+ } -+ } -+ -+not_equal: -+ if (key && key->reverse) -+ return -diff; -+ else -+ return diff; -+} -+#endif -+ - /* Compare two lines A and B, returning negative, zero, or positive - depending on whether A compares less than, equal to, or greater than B. */ - -@@ -2755,7 +3398,7 @@ compare (struct line const *a, struct line const *b) - diff = - NONZERO (blen); - else if (blen == 0) - diff = 1; -- else if (hard_LC_COLLATE) -+ else if (hard_LC_COLLATE && !folding) - { - /* xmemcoll0 is a performance enhancement as - it will not unconditionally write '\0' after the -@@ -4145,6 +4788,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype) - break; - case 'f': - key->translate = fold_toupper; -+ folding = true; - break; - case 'g': - key->general_numeric = true; -@@ -4224,7 +4868,7 @@ main (int argc, char **argv) - initialize_exit_failure (SORT_FAILURE); - - hard_LC_COLLATE = hard_locale (LC_COLLATE); --#if HAVE_NL_LANGINFO -+#if HAVE_LANGINFO_CODESET - hard_LC_TIME = hard_locale (LC_TIME); - #endif - -@@ -4245,6 +4889,29 @@ main (int argc, char **argv) - thousands_sep = -1; - } - -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ inittables = inittables_mb; -+ begfield = begfield_mb; -+ limfield = limfield_mb; -+ skipblanks = skipblanks_mb; -+ getmonth = getmonth_mb; -+ keycompare = keycompare_mb; -+ numcompare = numcompare_mb; -+ } -+ else -+#endif -+ { -+ inittables = inittables_uni; -+ begfield = begfield_uni; -+ limfield = limfield_uni; -+ skipblanks = skipblanks_uni; -+ getmonth = getmonth_uni; -+ keycompare = keycompare_uni; -+ numcompare = numcompare_uni; -+ } -+ - have_read_stdin = false; - inittables (); - -@@ -4519,13 +5186,34 @@ main (int argc, char **argv) - - case 't': - { -- char newtab = optarg[0]; -- if (! newtab) -+ char newtab[MB_LEN_MAX + 1]; -+ size_t newtab_length = 1; -+ strncpy (newtab, optarg, MB_LEN_MAX); -+ if (! newtab[0]) - die (SORT_FAILURE, 0, _("empty tab")); -- if (optarg[1]) -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ wchar_t wc; -+ mbstate_t state; -+ -+ memset (&state, '\0', sizeof (mbstate_t)); -+ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, -+ MB_LEN_MAX), -+ &state); -+ switch (newtab_length) -+ { -+ case (size_t) -1: -+ case (size_t) -2: -+ case 0: -+ newtab_length = 1; -+ } -+ } -+#endif -+ if (newtab_length == 1 && optarg[1]) - { - if (STREQ (optarg, "\\0")) -- newtab = '\0'; -+ newtab[0] = '\0'; - else - { - /* Provoke with 'sort -txx'. Complain about -@@ -4536,9 +5224,11 @@ main (int argc, char **argv) - quote (optarg)); - } - } -- if (tab != TAB_DEFAULT && tab != newtab) -+ if (tab_length && (tab_length != newtab_length -+ || memcmp (tab, newtab, tab_length) != 0)) - die (SORT_FAILURE, 0, _("incompatible tabs")); -- tab = newtab; -+ memcpy (tab, newtab, newtab_length); -+ tab_length = newtab_length; - } - break; - -@@ -4767,12 +5457,10 @@ main (int argc, char **argv) - sort (files, nfiles, outfile, nthreads); - } - --#ifdef lint - if (files_from) - readtokens0_free (&tok); - else - free (files); --#endif - - if (have_read_stdin && fclose (stdin) == EOF) - sort_die (_("close failed"), "-"); -diff --git a/src/unexpand.c b/src/unexpand.c -index 09dafdb..7d5dd64 100644 ---- a/src/unexpand.c -+++ b/src/unexpand.c -@@ -38,6 +38,9 @@ - #include - #include - #include -+ -+#include -+ - #include "system.h" - #include "die.h" - #include "xstrndup.h" -@@ -107,24 +110,47 @@ unexpand (void) - { - /* Input stream. */ - FILE *fp = next_file (NULL); -+ mb_file_t mbf; - - /* The array of pending blanks. In non-POSIX locales, blanks can - include characters other than spaces, so the blanks must be - stored, not merely counted. */ -- char *pending_blank; -+ mbf_char_t *pending_blank; -+ /* True if the starting locale is utf8. */ -+ bool using_utf_locale; -+ -+ /* True if the first file contains BOM header. */ -+ bool found_bom; -+ using_utf_locale=check_utf_locale(); - - if (!fp) - return; -+ mbf_init (mbf, fp); -+ found_bom=check_bom(fp,&mbf); -+ -+ if (using_utf_locale == false && found_bom == true) -+ { -+ /*try using some predefined locale */ - -+ if (set_utf_locale () != 0) -+ { -+ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale")); -+ } -+ } - /* The worst case is a non-blank character, then one blank, then a - tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so - allocate MAX_COLUMN_WIDTH bytes to store the blanks. */ -- pending_blank = xmalloc (max_column_width); -+ pending_blank = xmalloc (max_column_width * sizeof (mbf_char_t)); -+ -+ if (found_bom == true) -+ { -+ print_bom(); -+ } - - while (true) - { - /* Input character, or EOF. */ -- int c; -+ mbf_char_t c; - - /* If true, perform translations. */ - bool convert = true; -@@ -158,12 +184,44 @@ unexpand (void) - - do - { -- while ((c = getc (fp)) < 0 && (fp = next_file (fp))) -- continue; -+ while (true) { -+ mbf_getc (c, mbf); -+ if ((mb_iseof (c)) && (fp = next_file (fp))) -+ { -+ mbf_init (mbf, fp); -+ if (fp!=NULL) -+ { -+ if (check_bom(fp,&mbf)==true) -+ { -+ /*Not the first file - check BOM header*/ -+ if (using_utf_locale==false && found_bom==false) -+ { -+ /*BOM header in subsequent file but not in the first one. */ -+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header")); -+ } -+ } -+ else -+ { -+ if(using_utf_locale==false && found_bom==true) -+ { -+ /*First file conatined BOM header - locale was switched to UTF -+ *all subsequent files should contain BOM. */ -+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header")); -+ } -+ } -+ } -+ continue; -+ } -+ else -+ { -+ break; -+ } -+ } -+ - - if (convert) - { -- bool blank = !! isblank (c); -+ bool blank = mb_isblank (c); - - if (blank) - { -@@ -180,16 +238,16 @@ unexpand (void) - if (next_tab_column < column) - die (EXIT_FAILURE, 0, _("input line is too long")); - -- if (c == '\t') -+ if (mb_iseq (c, '\t')) - { - column = next_tab_column; - - if (pending) -- pending_blank[0] = '\t'; -+ mb_setascii (&pending_blank[0], '\t'); - } - else - { -- column++; -+ column += mb_width (c); - - if (! (prev_blank && column == next_tab_column)) - { -@@ -197,13 +255,14 @@ unexpand (void) - will be replaced by tabs. */ - if (column == next_tab_column) - one_blank_before_tab_stop = true; -- pending_blank[pending++] = c; -+ mb_copy (&pending_blank[pending++], &c); - prev_blank = true; - continue; - } - - /* Replace the pending blanks by a tab or two. */ -- pending_blank[0] = c = '\t'; -+ mb_setascii (&c, '\t'); -+ mb_setascii (&pending_blank[0], '\t'); - } - - /* Discard pending blanks, unless it was a single -@@ -211,7 +270,7 @@ unexpand (void) - pending = one_blank_before_tab_stop; - } - } -- else if (c == '\b') -+ else if (mb_iseq (c, '\b')) - { - /* Go back one column, and force recalculation of the - next tab stop. */ -@@ -219,9 +278,9 @@ unexpand (void) - next_tab_column = column; - tab_index -= !!tab_index; - } -- else -+ else if (!mb_iseq (c, '\n')) - { -- column++; -+ column += mb_width (c); - if (!column) - die (EXIT_FAILURE, 0, _("input line is too long")); - } -@@ -229,8 +288,11 @@ unexpand (void) - if (pending) - { - if (pending > 1 && one_blank_before_tab_stop) -- pending_blank[0] = '\t'; -- if (fwrite (pending_blank, 1, pending, stdout) != pending) -+ mb_setascii (&pending_blank[0], '\t'); -+ -+ for (int n = 0; n < pending; ++n) -+ mb_putc (pending_blank[n], stdout); -+ if (ferror (stdout)) - die (EXIT_FAILURE, errno, _("write error")); - pending = 0; - one_blank_before_tab_stop = false; -@@ -240,16 +302,17 @@ unexpand (void) - convert &= convert_entire_line || blank; - } - -- if (c < 0) -+ if (mb_iseof (c)) - { - free (pending_blank); - return; - } - -- if (putchar (c) < 0) -+ mb_putc (c, stdout); -+ if (ferror (stdout)) - die (EXIT_FAILURE, errno, _("write error")); - } -- while (c != '\n'); -+ while (!mb_iseq (c, '\n')); - } - } - -diff --git a/src/uniq.c b/src/uniq.c -index e024757..534231f 100644 ---- a/src/uniq.c -+++ b/src/uniq.c -@@ -21,6 +21,17 @@ - #include - #include - -+/* Get mbstate_t, mbrtowc(). */ -+#if HAVE_WCHAR_H -+# include -+#endif -+ -+/* Get isw* functions. */ -+#if HAVE_WCTYPE_H -+# include -+#endif -+#include -+ - #include "system.h" - #include "argmatch.h" - #include "linebuffer.h" -@@ -33,6 +44,18 @@ - #include "memcasecmp.h" - #include "quote.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ -+ - /* The official name of this program (e.g., no 'g' prefix). */ - #define PROGRAM_NAME "uniq" - -@@ -139,6 +162,10 @@ enum - GROUP_OPTION = CHAR_MAX + 1 - }; - -+/* Function pointers. */ -+static char * -+(*find_field) (struct linebuffer *line); -+ - static struct option const longopts[] = - { - {"count", no_argument, NULL, 'c'}, -@@ -253,7 +280,7 @@ size_opt (char const *opt, char const *msgid) - return a pointer to the beginning of the line's field to be compared. */ - - static char * _GL_ATTRIBUTE_PURE --find_field (struct linebuffer const *line) -+find_field_uni (struct linebuffer *line) - { - size_t count; - char const *lp = line->buffer; -@@ -273,6 +300,83 @@ find_field (struct linebuffer const *line) - return line->buffer + i; - } - -+#if HAVE_MBRTOWC -+ -+# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ -+ do \ -+ { \ -+ mbstate_t state_bak; \ -+ \ -+ CONVFAIL = 0; \ -+ state_bak = *STATEP; \ -+ \ -+ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ -+ \ -+ switch (MBLENGTH) \ -+ { \ -+ case (size_t)-2: \ -+ case (size_t)-1: \ -+ *STATEP = state_bak; \ -+ CONVFAIL++; \ -+ /* Fall through */ \ -+ case 0: \ -+ MBLENGTH = 1; \ -+ } \ -+ } \ -+ while (0) -+ -+static char * -+find_field_multi (struct linebuffer *line) -+{ -+ size_t count; -+ char *lp = line->buffer; -+ size_t size = line->length - 1; -+ size_t pos; -+ size_t mblength; -+ wchar_t wc; -+ mbstate_t *statep; -+ int convfail = 0; -+ -+ pos = 0; -+ statep = &(line->state); -+ -+ /* skip fields. */ -+ for (count = 0; count < skip_fields && pos < size; count++) -+ { -+ while (pos < size) -+ { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ -+ if (convfail || !(iswblank (wc) || wc == '\n')) -+ { -+ pos += mblength; -+ break; -+ } -+ pos += mblength; -+ } -+ -+ while (pos < size) -+ { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ -+ if (!convfail && (iswblank (wc) || wc == '\n')) -+ break; -+ -+ pos += mblength; -+ } -+ } -+ -+ /* skip fields. */ -+ for (count = 0; count < skip_chars && pos < size; count++) -+ { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ pos += mblength; -+ } -+ -+ return lp + pos; -+} -+#endif -+ - /* Return false if two strings OLD and NEW match, true if not. - OLD and NEW point not to the beginnings of the lines - but rather to the beginnings of the fields to compare. -@@ -493,6 +597,19 @@ main (int argc, char **argv) - - atexit (close_stdout); - -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ find_field = find_field_multi; -+ } -+ else -+#endif -+ { -+ find_field = find_field_uni; -+ } -+ -+ -+ - skip_chars = 0; - skip_fields = 0; - check_chars = SIZE_MAX; -diff --git a/tests/expand/mb.sh b/tests/expand/mb.sh -new file mode 100755 -index 0000000..dd6007c ---- /dev/null -+++ b/tests/expand/mb.sh -@@ -0,0 +1,183 @@ -+#!/bin/sh -+ -+# Copyright (C) 2012-2015 Free Software Foundation, Inc. -+ -+# This program is free software: you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation, either version 3 of the License, or -+# (at your option) any later version. -+ -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+ -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src -+print_ver_ expand -+ -+export LC_ALL=en_US.UTF-8 -+ -+#input containing multibyte characters -+cat <<\EOF > in || framework_failure_ -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+EOF -+env printf ' äöü\t. öüä. \tä xx\n' >> in || framework_failure_ -+ -+cat <<\EOF > exp || framework_failure_ -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+EOF -+ -+expand < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+#multiple files as an input -+cat <<\EOF >> exp || framework_failure_ -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+EOF -+ -+expand ./in ./in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+#test characters with display widths != 1 -+env printf '12345678 -+e\t|ascii(1) -+\u00E9\t|composed(1) -+e\u0301\t|decomposed(1) -+\u3000\t|ideo-space(2) -+\uFF0D\t|full-hypen(2) -+' > in || framework_failure_ -+ -+env printf '12345678 -+e |ascii(1) -+\u00E9 |composed(1) -+e\u0301 |decomposed(1) -+\u3000 |ideo-space(2) -+\uFF0D |full-hypen(2) -+' > exp || framework_failure_ -+ -+expand < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+#shouldn't fail with "input line too long" -+#when a line starts with a control character -+env printf '\n' > in || framework_failure_ -+ -+expand < in > out || fail=1 -+compare in out > /dev/null 2>&1 || fail=1 -+ -+#non-Unicode characters interspersed between Unicode ones -+env printf '12345678 -+\t\xFF| -+\xFF\t| -+\t\xFFä| -+ä\xFF\t| -+\tä\xFF| -+\xFF\tä| -+äbcdef\xFF\t| -+' > in || framework_failure_ -+ -+env printf '12345678 -+ \xFF| -+\xFF | -+ \xFFä| -+ä\xFF | -+ ä\xFF| -+\xFF ä| -+äbcdef\xFF | -+' > exp || framework_failure_ -+ -+expand < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+ -+ -+#BOM header test 1 -+printf "\xEF\xBB\xBF" > in; cat <<\EOF >> in || framework_failure_ -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+EOF -+env printf ' äöü\t. öüä. \tä xx\n' >> in || framework_failure_ -+ -+printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_ -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+EOF -+ -+ -+expand < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+LANG=C expand < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+LC_ALL=C expand < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+ -+printf '\xEF\xBB\xBF' > in1; cat <<\EOF >> in1 || framework_failure_ -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+EOF -+env printf ' äöü\t. öüä. \tä xx\n' >> in1 || framework_failure_ -+ -+ -+printf '\xEF\xBB\xBF' > exp; cat <<\EOF >> exp || framework_failure_ -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+EOF -+ -+expand in1 in1 > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+LANG=C expand in1 in1 > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+LC_ALL=C expand in1 in1 > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+exit $fail -diff --git a/tests/i18n/sort.sh b/tests/i18n/sort.sh -new file mode 100755 -index 0000000..26c95de ---- /dev/null -+++ b/tests/i18n/sort.sh -@@ -0,0 +1,29 @@ -+#!/bin/sh -+# Verify sort's multi-byte support. -+ -+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src -+print_ver_ sort -+ -+export LC_ALL=en_US.UTF-8 -+locale -k LC_CTYPE | grep -q "charmap.*UTF-8" \ -+ || skip_ "No UTF-8 locale available" -+ -+# Enable heap consistency checkng on older systems -+export MALLOC_CHECK_=2 -+ -+ -+# check buffer overflow issue due to -+# expanding multi-byte representation due to case conversion -+# https://bugzilla.suse.com/show_bug.cgi?id=928749 -+cat < exp -+. -+ɑ -+EOF -+cat < out || fail=1 -+. -+ɑ -+EOF -+compare exp out || { fail=1; cat out; } -+ -+ -+Exit $fail -diff --git a/tests/local.mk b/tests/local.mk -index 594a9d0..5f7f775 100644 ---- a/tests/local.mk -+++ b/tests/local.mk -@@ -369,6 +369,8 @@ all_tests = \ - tests/misc/sort-discrim.sh \ - tests/misc/sort-files0-from.pl \ - tests/misc/sort-float.sh \ -+ tests/misc/sort-mb-tests.sh \ -+ tests/i18n/sort.sh \ - tests/misc/sort-h-thousands-sep.sh \ - tests/misc/sort-merge.pl \ - tests/misc/sort-merge-fdlimit.sh \ -@@ -567,6 +569,7 @@ all_tests = \ - tests/du/threshold.sh \ - tests/du/trailing-slash.sh \ - tests/du/two-args.sh \ -+ tests/expand/mb.sh \ - tests/id/gnu-zero-uids.sh \ - tests/id/no-context.sh \ - tests/id/context.sh \ -@@ -714,6 +717,7 @@ all_tests = \ - tests/touch/read-only.sh \ - tests/touch/relative.sh \ - tests/touch/trailing-slash.sh \ -+ tests/unexpand/mb.sh \ - $(all_root_tests) - - # See tests/factor/create-test.sh. -diff --git a/tests/misc/expand.pl b/tests/misc/expand.pl -index 8e5beaf..040e321 100755 ---- a/tests/misc/expand.pl -+++ b/tests/misc/expand.pl -@@ -27,6 +27,15 @@ my $prog = 'expand'; - # Turn off localization of executable's output. - @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; - -+#comment out next line to disable multibyte tests -+my $mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ -+my $prog = 'expand'; -+my $try = "Try \`$prog --help' for more information.\n"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ - my @Tests = - ( - ['t1', '--tabs=3', {IN=>"a\tb"}, {OUT=>"a b"}], -@@ -168,6 +177,8 @@ my @Tests = - - - # Test errors -+ # FIXME: The following tests contain ‘quoting’ specific to LC_MESSAGES -+ # So we force LC_MESSAGES=C to make them pass. - ['e1', '--tabs="a"', {IN=>''}, {OUT=>''}, {EXIT=>1}, - {ERR => "$prog: tab size contains invalid character(s): 'a'\n"}], - ['e2', "-t $UINTMAX_OFLOW", {IN=>''}, {OUT=>''}, {EXIT=>1}, -@@ -184,6 +195,37 @@ my @Tests = - {ERR => "$prog: '/' specifier not at start of number: '/'\n"}], - ); - -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether expand is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ push @new, ["$test_name-mb", @new_t, {ENV => "LANG=$mb_locale LC_MESSAGES=C"}]; -+ } -+ push @Tests, @new; -+ } -+ -+ -+@Tests = triple_test \@Tests; -+ - my $save_temps = $ENV{DEBUG}; - my $verbose = $ENV{VERBOSE}; - -diff --git a/tests/misc/fold.pl b/tests/misc/fold.pl -index 3a72629..af85ee5 100755 ---- a/tests/misc/fold.pl -+++ b/tests/misc/fold.pl -@@ -20,9 +20,18 @@ use strict; - - (my $program_name = $0) =~ s|.*/||; - -+my $prog = 'fold'; -+my $try = "Try \`$prog --help' for more information.\n"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ - # Turn off localization of executable's output. - @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; - -+# uncommented to enable multibyte paths -+my $mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ - my @Tests = - ( - ['s1', '-w2 -s', {IN=>"a\t"}, {OUT=>"a\n\t"}], -@@ -31,9 +40,48 @@ my @Tests = - ['s4', '-w4 -s', {IN=>"abc ef\n"}, {OUT=>"abc \nef\n"}], - ); - -+# Add _POSIX2_VERSION=199209 to the environment of each test -+# that uses an old-style option like +1. -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether fold is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ -+@Tests = triple_test \@Tests; -+ -+# Remember that triple_test creates from each test with exactly one "IN" -+# file two more tests (.p and .r suffix on name) corresponding to reading -+# input from a file and from a pipe. The pipe-reading test would fail -+# due to a race condition about 1 in 20 times. -+# Remove the IN_PIPE version of the "output-is-input" test above. -+# The others aren't susceptible because they have three inputs each. -+@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; -+ - my $save_temps = $ENV{DEBUG}; - my $verbose = $ENV{VERBOSE}; - --my $prog = 'fold'; - my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); - exit $fail; -diff --git a/tests/misc/join.pl b/tests/misc/join.pl -index 8ab93ad..bdd3bc7 100755 ---- a/tests/misc/join.pl -+++ b/tests/misc/join.pl -@@ -25,6 +25,15 @@ my $limits = getlimits (); - - my $prog = 'join'; - -+my $try = "Try \`$prog --help' for more information.\n"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ -+my $mb_locale; -+#Comment out next line to disable multibyte tests -+$mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ - my $delim = chr 0247; - sub t_subst ($) - { -@@ -333,8 +342,49 @@ foreach my $t (@tv) - push @Tests, $new_ent; - } - -+# Add _POSIX2_VERSION=199209 to the environment of each test -+# that uses an old-style option like +1. -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether join is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ #Adjust the output some error messages including test_name for mb -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR}} -+ (@new_t)) -+ { -+ my $sub2 = {ERR_SUBST => "s/$test_name-mb/$test_name/"}; -+ push @new_t, $sub2; -+ push @$t, $sub2; -+ } -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ - @Tests = triple_test \@Tests; - -+#skip invalid-j-mb test, it is failing because of the format -+@Tests = grep {$_->[0] ne 'invalid-j-mb'} @Tests; -+ - my $save_temps = $ENV{DEBUG}; - my $verbose = $ENV{VERBOSE}; - -diff --git a/tests/misc/sort-mb-tests.sh b/tests/misc/sort-mb-tests.sh -new file mode 100755 -index 0000000..11836ba ---- /dev/null -+++ b/tests/misc/sort-mb-tests.sh -@@ -0,0 +1,45 @@ -+#!/bin/sh -+# Verify sort's multi-byte support. -+ -+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src -+print_ver_ sort -+ -+export LC_ALL=en_US.UTF-8 -+locale -k LC_CTYPE | grep -q "charmap.*UTF-8" \ -+ || skip_ "No UTF-8 locale available" -+ -+ -+cat < exp -+Banana@5 -+Apple@10 -+Citrus@20 -+Cherry@30 -+EOF -+ -+cat < out || fail=1 -+Apple@10 -+Banana@5 -+Citrus@20 -+Cherry@30 -+EOF -+ -+compare exp out || { fail=1; cat out; } -+ -+ -+cat < exp -+Citrus@AA20@@5 -+Cherry@AA30@@10 -+Apple@AA10@@20 -+Banana@AA5@@30 -+EOF -+ -+cat < out || fail=1 -+Apple@AA10@@20 -+Banana@AA5@@30 -+Citrus@AA20@@5 -+Cherry@AA30@@10 -+EOF -+ -+compare exp out || { fail=1; cat out; } -+ -+Exit $fail -diff --git a/tests/misc/sort-merge.pl b/tests/misc/sort-merge.pl -index 3a48580..4540d2f 100755 ---- a/tests/misc/sort-merge.pl -+++ b/tests/misc/sort-merge.pl -@@ -26,6 +26,15 @@ my $prog = 'sort'; - # Turn off localization of executable's output. - @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; - -+my $mb_locale; -+# uncommented according to upstream commit enabling multibyte paths -+$mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ -+my $try = "Try \`$prog --help' for more information.\n"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ - # three empty files and one that says 'foo' - my @inputs = (+(map{{IN=> {"empty$_"=> ''}}}1..3), {IN=> {foo=> "foo\n"}}); - -@@ -77,6 +86,39 @@ my @Tests = - {OUT=>$big_input}], - ); - -+# Add _POSIX2_VERSION=199209 to the environment of each test -+# that uses an old-style option like +1. -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether sort is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ next if ($test_name =~ "nmerge-."); -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ -+@Tests = triple_test \@Tests; -+ - my $save_temps = $ENV{DEBUG}; - my $verbose = $ENV{VERBOSE}; - -diff --git a/tests/misc/sort.pl b/tests/misc/sort.pl -index 081618d..af217b4 100755 ---- a/tests/misc/sort.pl -+++ b/tests/misc/sort.pl -@@ -24,10 +24,15 @@ my $prog = 'sort'; - # Turn off localization of executable's output. - @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; - --my $mb_locale = $ENV{LOCALE_FR_UTF8}; -+my $mb_locale; -+#Comment out next line to disable multibyte tests -+$mb_locale = $ENV{LOCALE_FR_UTF8}; - ! defined $mb_locale || $mb_locale eq 'none' - and $mb_locale = 'C'; - -+my $try = "Try \`$prog --help' for more information.\n"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ - # Since each test is run with a file name and with redirected stdin, - # the name in the diagnostic is either the file name or "-". - # Normalize each diagnostic to use '-'. -@@ -423,6 +428,38 @@ foreach my $t (@Tests) - } - } - -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether sort is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ #disable several failing tests until investigation, disable all tests with envvars set -+ next if (grep {ref $_ eq 'HASH' && exists $_->{ENV}} (@new_t)); -+ next if ($test_name =~ "18g" or $test_name =~ "sort-numeric" or $test_name =~ "08[ab]" or $test_name =~ "03[def]" or $test_name =~ "h4" or $test_name =~ "n1" or $test_name =~ "2[01]a"); -+ next if ($test_name =~ "11[ab]"); # avoid FP: expected result differs to MB result due to collation rules. -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ - @Tests = triple_test \@Tests; - - # Remember that triple_test creates from each test with exactly one "IN" -@@ -432,6 +469,7 @@ foreach my $t (@Tests) - # Remove the IN_PIPE version of the "output-is-input" test above. - # The others aren't susceptible because they have three inputs each. - @Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; -+@Tests = grep {$_->[0] ne 'output-is-input-mb.p'} @Tests; - - my $save_temps = $ENV{DEBUG}; - my $verbose = $ENV{VERBOSE}; -diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl -index c8f5ff9..518e820 100755 ---- a/tests/misc/unexpand.pl -+++ b/tests/misc/unexpand.pl -@@ -27,6 +27,14 @@ my $limits = getlimits (); - - my $prog = 'unexpand'; - -+# comment out next line to disable multibyte tests -+my $mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ -+my $try = "Try \`$prog --help' for more information.\n"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ - my @Tests = - ( - ['a1', {IN=> ' 'x 1 ."y\n"}, {OUT=> ' 'x 1 ."y\n"}], -@@ -128,6 +136,37 @@ my @Tests = - ['ts2', '-t5,8', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t y\n"}], - ); - -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether unexpand is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ next if ($test_name =~ 'b-1'); -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ -+@Tests = triple_test \@Tests; -+ - my $save_temps = $ENV{DEBUG}; - my $verbose = $ENV{VERBOSE}; - -diff --git a/tests/misc/uniq.pl b/tests/misc/uniq.pl -index 6f332c8..ce49925 100755 ---- a/tests/misc/uniq.pl -+++ b/tests/misc/uniq.pl -@@ -23,9 +23,17 @@ my $limits = getlimits (); - my $prog = 'uniq'; - my $try = "Try '$prog --help' for more information.\n"; - -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ - # Turn off localization of executable's output. - @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; - -+my $mb_locale; -+#Comment out next line to disable multibyte tests -+$mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ - # When possible, create a "-z"-testing variant of each test. - sub add_z_variants($) - { -@@ -262,6 +270,53 @@ foreach my $t (@Tests) - and push @$t, {ENV=>'_POSIX2_VERSION=199209'}; - } - -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether uniq is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ # In test #145, replace the each ‘...’ by '...'. -+ if ($test_name =~ "145") -+ { -+ my $sub = { ERR_SUBST => "s/‘([^’]+)’/'\$1'/g"}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ next if ( $test_name =~ "schar" -+ or $test_name =~ "^obs-plus" -+ or $test_name =~ "119"); -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ -+# Remember that triple_test creates from each test with exactly one "IN" -+# file two more tests (.p and .r suffix on name) corresponding to reading -+# input from a file and from a pipe. The pipe-reading test would fail -+# due to a race condition about 1 in 20 times. -+# Remove the IN_PIPE version of the "output-is-input" test above. -+# The others aren't susceptible because they have three inputs each. -+ -+@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; -+ - @Tests = add_z_variants \@Tests; - @Tests = triple_test \@Tests; - -diff --git a/tests/pr/pr-tests.pl b/tests/pr/pr-tests.pl -index 2720369..8427f2b 100755 ---- a/tests/pr/pr-tests.pl -+++ b/tests/pr/pr-tests.pl -@@ -24,6 +24,15 @@ use strict; - my $prog = 'pr'; - my $normalize_strerror = "s/': .*/'/"; - -+my $mb_locale; -+#Uncomment the following line to enable multibyte tests -+$mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ -+my $try = "Try \`$prog --help' for more information.\n"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ - my @tv = ( - - # -b option is no longer an official option. But it's still working to -@@ -474,8 +483,48 @@ push @Tests, - {IN=>{2=>"a\n"}}, - {OUT=>"a\t\t\t\t \t\t\ta\n"} ]; - -+# Add _POSIX2_VERSION=199209 to the environment of each test -+# that uses an old-style option like +1. -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether pr is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ #temporarily skip some failing tests -+ next if ($test_name =~ "col-0" or $test_name =~ "col-inval" or $test_name =~ "asan1"); -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ - @Tests = triple_test \@Tests; - -+# Remember that triple_test creates from each test with exactly one "IN" -+# file two more tests (.p and .r suffix on name) corresponding to reading -+# input from a file and from a pipe. The pipe-reading test would fail -+# due to a race condition about 1 in 20 times. -+# Remove the IN_PIPE version of the "output-is-input" test above. -+# The others aren't susceptible because they have three inputs each. -+@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; -+ - my $save_temps = $ENV{DEBUG}; - my $verbose = $ENV{VERBOSE}; - -diff --git a/tests/unexpand/mb.sh b/tests/unexpand/mb.sh -new file mode 100755 -index 0000000..8a82d74 ---- /dev/null -+++ b/tests/unexpand/mb.sh -@@ -0,0 +1,172 @@ -+#!/bin/sh -+ -+# Copyright (C) 2012-2015 Free Software Foundation, Inc. -+ -+# This program is free software: you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation, either version 3 of the License, or -+# (at your option) any later version. -+ -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+ -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src -+print_ver_ unexpand -+ -+export LC_ALL=en_US.UTF-8 -+ -+#input containing multibyte characters -+cat > in <<\EOF -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+EOF -+ -+cat > exp <<\EOF -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+EOF -+ -+unexpand -a < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+ -+#multiple files as an input -+cat >> exp <<\EOF -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+EOF -+ -+ -+unexpand -a ./in ./in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+#test characters with a display width larger than 1 -+ -+env printf '12345678 -+e |ascii(1) -+\u00E9 |composed(1) -+e\u0301 |decomposed(1) -+\u3000 |ideo-space(2) -+\uFF0D |full-hypen(2) -+' > in || framework_failure_ -+ -+env printf '12345678 -+e\t|ascii(1) -+\u00E9\t|composed(1) -+e\u0301\t|decomposed(1) -+\u3000\t|ideo-space(2) -+\uFF0D\t|full-hypen(2) -+' > exp || framework_failure_ -+ -+unexpand -a < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+#test input where a blank of width > 1 is not being substituted -+in="$(LC_ALL=en_US.UTF-8 printf ' \u3000 ö ü ß')" -+exp='   ö ü ß' -+ -+unexpand -a < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+#non-Unicode characters interspersed between Unicode ones -+env printf '12345678 -+ \xFF| -+\xFF | -+ \xFFä| -+ä\xFF | -+ ä\xFF| -+\xFF ä| -+äbcdef\xFF | -+' > in || framework_failure_ -+ -+env printf '12345678 -+\t\xFF| -+\xFF\t| -+\t\xFFä| -+ä\xFF\t| -+\tä\xFF| -+\xFF\tä| -+äbcdef\xFF\t| -+' > exp || framework_failure_ -+ -+unexpand -a < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+#BOM header test 1 -+printf "\xEF\xBB\xBF" > in; cat <<\EOF >> in || framework_failure_ -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+EOF -+env printf ' äöü\t. öüä. \tä xx\n' >> in || framework_failure_ -+ -+printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_ -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+EOF -+ -+unexpand < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+LANG=C unexpand < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+LC_ALL=C unexpand < in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+ -+printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_ -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+1234567812345678123456781 -+. . . . -+a b c d -+. . . . -+ä ö ü ß -+. . . . -+ äöü . öüä. ä xx -+EOF -+ -+ -+unexpand in in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+LANG=C unexpand in in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 -+ -+LC_ALL=C unexpand in in > out || fail=1 -+compare exp out > /dev/null 2>&1 || fail=1 --- -2.23.0 - diff --git a/backport-ls-fix-crash-printing-SELinux-context-for-unstatable.patch b/backport-ls-fix-crash-printing-SELinux-context-for-unstatable.patch deleted file mode 100644 index 6ef657af6494c42c6ec173e9ea7f9b39bf2dfc04..0000000000000000000000000000000000000000 --- a/backport-ls-fix-crash-printing-SELinux-context-for-unstatable.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 6fc695cb4a26f09dfeef8b1c24895a707055334e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?P=C3=A1draig=20Brady?= -Date: Wed, 11 Nov 2020 17:22:33 +0000 -Subject: [PATCH] ls: fix crash printing SELinux context for unstatable files - -This crash was identified by Cyber Independent Testing Lab: -https://cyber-itl.org/2020/10/28/citl-7000-defects.html -and was introduced with commit v6.9.90-11-g4245876e2 - -* src/ls.c (gobble_file): Ensure scontext is initialized -in the case where files are not statable. -* tests/ls/selinux-segfault.sh: Renamed from proc-selinux-segfault.sh, -and added test case for broken symlinks. -* tests/local.mk: Adjust for the renamed test. -* NEWS: Mention the bug fix. ---- - src/ls.c | 3 +++ - tests/local.mk | 2 +- - tests/ls/proc-selinux-segfault.sh | 27 --------------------------- - tests/ls/selinux-segfault.sh | 33 +++++++++++++++++++++++++++++++++ - 4 files changed, 40 insertions(+), 28 deletions(-) - delete mode 100755 tests/ls/proc-selinux-segfault.sh - create mode 100755 tests/ls/selinux-segfault.sh - -diff --git a/src/ls.c b/src/ls.c -index 1f6afbc..1b4834c 100644 ---- a/src/ls.c -+++ b/src/ls.c -@@ -3424,6 +3424,9 @@ gobble_file (char const *name, enum filetype type, ino_t inode, - provokes an exit status of 1. */ - file_failure (command_line_arg, - _("cannot access %s"), full_name); -+ -+ f->scontext = UNKNOWN_SECURITY_CONTEXT; -+ - if (command_line_arg) - return 0; - -diff --git a/tests/local.mk b/tests/local.mk -index 7992003..e1c4675 100644 ---- a/tests/local.mk -+++ b/tests/local.mk -@@ -613,7 +613,7 @@ all_tests = \ - tests/ls/multihardlink.sh \ - tests/ls/no-arg.sh \ - tests/ls/no-cap.sh \ -- tests/ls/proc-selinux-segfault.sh \ -+ tests/ls/selinux-segfault.sh \ - tests/ls/quote-align.sh \ - tests/ls/readdir-mountpoint-inode.sh \ - tests/ls/recursive.sh \ -diff --git a/tests/ls/proc-selinux-segfault.sh b/tests/ls/proc-selinux-segfault.sh -deleted file mode 100755 -index 831a00e..0000000 ---- a/tests/ls/proc-selinux-segfault.sh -+++ /dev/null -@@ -1,27 +0,0 @@ --#!/bin/sh --# ls -l /proc/sys would segfault when built against libselinux1 2.0.15-2+b1 -- --# Copyright (C) 2008-2020 Free Software Foundation, Inc. -- --# This program is free software: you can redistribute it and/or modify --# it under the terms of the GNU General Public License as published by --# the Free Software Foundation, either version 3 of the License, or --# (at your option) any later version. -- --# This program is distributed in the hope that it will be useful, --# but WITHOUT ANY WARRANTY; without even the implied warranty of --# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the --# GNU General Public License for more details. -- --# You should have received a copy of the GNU General Public License --# along with this program. If not, see . -- --. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src --print_ver_ ls -- --f=/proc/sys --test -r $f || f=. -- --ls -l $f > out || fail=1 -- --Exit $fail -diff --git a/tests/ls/selinux-segfault.sh b/tests/ls/selinux-segfault.sh -new file mode 100755 -index 0000000..e2b7ef6 ---- /dev/null -+++ b/tests/ls/selinux-segfault.sh -@@ -0,0 +1,33 @@ -+#!/bin/sh -+# Ensure we don't segfault in selinux handling -+ -+# Copyright (C) 2008-2020 Free Software Foundation, Inc. -+ -+# This program is free software: you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation, either version 3 of the License, or -+# (at your option) any later version. -+ -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+ -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src -+print_ver_ ls -+ -+# ls -l /proc/sys would segfault when built against libselinux1 2.0.15-2+b1 -+f=/proc/sys -+test -r $f || f=. -+ls -l $f > out || fail=1 -+ -+# ls <= 8.32 would segfault when printing -+# the security context of broken symlink targets -+mkdir sedir || framework_failure_ -+ln -sf missing sedir/broken || framework_failure_ -+returns_ 1 ls -L -R -Z -m sedir > out || fail=1 -+ -+Exit $fail --- -1.8.3.1 - diff --git a/backport-tr-fix-crash-validating-c-with-some-case-char-classe.patch b/backport-tr-fix-crash-validating-c-with-some-case-char-classe.patch deleted file mode 100644 index a89bc5e69a4d52acdfa22f33f4ba648ea0bb2361..0000000000000000000000000000000000000000 --- a/backport-tr-fix-crash-validating-c-with-some-case-char-classe.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 2bc66c5ea7e507786a45c1b6b15fe74a338240f4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?P=C3=A1draig=20Brady?= -Date: Sat, 14 Nov 2020 16:47:05 +0000 -Subject: [PATCH] tr: fix crash validating -c with some case char classes - -This crash was identified by Cyber Independent Testing Lab: -https://cyber-itl.org/2020/10/28/citl-7000-defects.html -and was introduced with commit v8.5-163-g3f48829c2 - -* src/tr.c (validate_case_classes): Don't apply these -extra case alignment checks in the --complement case, -which is even more restrictive as to the contents of SET2. -* tests/misc/tr-case-class.sh: Add a test case, -for a large SET1, which caused the length adjustment -in validate_case_classes to underflow and trigger the assert. -* NEWS: Mention the bug fix. ---- - src/tr.c | 2 +- - tests/misc/tr-case-class.sh | 3 +++ - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/src/tr.c b/src/tr.c -index 6f76507..94794a2 100644 ---- a/src/tr.c -+++ b/src/tr.c -@@ -1176,7 +1176,7 @@ validate_case_classes (struct Spec_list *s1, struct Spec_list *s2) - bool s1_new_element = true; - bool s2_new_element = true; - -- if (!s2->has_char_class) -+ if (complement || !s2->has_char_class) - return; - - for (int i = 0; i < N_CHARS; i++) -diff --git a/tests/misc/tr-case-class.sh b/tests/misc/tr-case-class.sh -index 470197e..9f442c0 100755 ---- a/tests/misc/tr-case-class.sh -+++ b/tests/misc/tr-case-class.sh -@@ -110,4 +110,7 @@ the latter string must not end with a character class' > exp - compare exp out || fail=1 - fi - -+# coreutils 8.6 - 8.32 inclusive, would abort trying to validate the following -+returns_ 1 tr -c '[:upper:]\000-\370' '[:lower:]' < /dev/null || fail=1 -+ - Exit $fail --- -1.8.3.1 - diff --git a/bugfix-selinux-flask.patch b/bugfix-selinux-flask.patch index 56918b2e34e03e8076275f4aa98659724fd40d48..83839fa7bc79a53303b174c38b703c6f09cc3f9b 100644 --- a/bugfix-selinux-flask.patch +++ b/bugfix-selinux-flask.patch @@ -11,13 +11,14 @@ diff --git a/m4/gnulib-comp.m4 b/m4/gnulib-comp.m4 index dead90e..0abf0bd 100644 --- a/m4/gnulib-comp.m4 +++ b/m4/gnulib-comp.m4 -@@ -1860,10 +1860,10 @@ AC_DEFUN([gl_INIT], +@@ -1860,11 +1860,11 @@ AC_DEFUN([gl_INIT], AC_LIBOBJ([select]) fi gl_SYS_SELECT_MODULE_INDICATOR([select]) - AC_CHECK_HEADERS([selinux/flask.h]) gl_HEADERS_SELINUX_SELINUX_H gl_HEADERS_SELINUX_CONTEXT_H + gl_HEADERS_SELINUX_LABEL_H if test "$with_selinux" != no && test "$ac_cv_header_selinux_selinux_h" = yes; then + AC_CHECK_HEADERS([selinux/flask.h]) AC_LIBOBJ([getfilecon]) diff --git a/coreutils-8.32-leaf-opt-xfs.patch b/coreutils-8.32-leaf-opt-xfs.patch deleted file mode 100644 index c02db71c148f41db7b7d9fa9e25c27146c5a3552..0000000000000000000000000000000000000000 --- a/coreutils-8.32-leaf-opt-xfs.patch +++ /dev/null @@ -1,164 +0,0 @@ -From b9f9ed14bda93ecb407129b69e6476813c250046 Mon Sep 17 00:00:00 2001 -From: Paul Eggert -Date: Wed, 15 Apr 2020 20:50:32 -0700 -Subject: [PATCH] fts: remove NOSTAT_LEAF_OPTIMIZATION -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -It caused ‘find’ and ‘du’ to dump core, and it was useful -only for obsolescent Linux filesystems anyway. Problem reported in: -https://lists.gnu.org/r/bug-gnulib/2020-04/msg00068.html -Quite possibly there is still a serious underlying fts bug with -tight-loop-check and mutating file systems, but if so this patch -should cause the bug to be triggered less often. -* lib/fts.c (enum leaf_optimization): Remove -NOSTAT_LEAF_OPTIMIZATION, as it’s problematic. -(S_MAGIC_REISERFS, S_MAGIC_XFS): Remove; no longer needed. -(leaf_optimization): Remove special cases for ReiserFS and XFS. -(fts_read): Remove NOSTAT_LEAF_OPTIMIZATION code. -* lib/fts_.h (struct _ftsent.fts_n_dirs_remaining): -Remove. All uses removed. - -Upstream-commit: 47bf2cf3184027c1eb9c1dfeea5c5b8b2d69710d -Signed-off-by: Kamil Dudka ---- - lib/fts.c | 56 ++++++++------------------------------------------- - lib/fts_.h | 5 ----- - 2 files changed, 8 insertions(+), 53 deletions(-) - -diff --git a/lib/fts.c b/lib/fts.c -index 1093ce5..dfe3fef 100644 ---- a/lib/fts.c -+++ b/lib/fts.c -@@ -445,7 +445,6 @@ fts_open (char * const *argv, - if ((parent = fts_alloc(sp, "", 0)) == NULL) - goto mem2; - parent->fts_level = FTS_ROOTPARENTLEVEL; -- parent->fts_n_dirs_remaining = -1; - } - - /* The classic fts implementation would call fts_stat with -@@ -634,9 +633,8 @@ fts_close (FTS *sp) - } - - /* Minimum link count of a traditional Unix directory. When leaf -- optimization is OK and MIN_DIR_NLINK <= st_nlink, then st_nlink is -- an upper bound on the number of subdirectories (counting "." and -- ".."). */ -+ optimization is OK and a directory's st_nlink == MIN_DIR_NLINK, -+ then the directory has no subdirectories. */ - enum { MIN_DIR_NLINK = 2 }; - - /* Whether leaf optimization is OK for a directory. */ -@@ -645,12 +643,8 @@ enum leaf_optimization - /* st_nlink is not reliable for this directory's subdirectories. */ - NO_LEAF_OPTIMIZATION, - -- /* Leaf optimization is OK, but is not useful for avoiding stat calls. */ -- OK_LEAF_OPTIMIZATION, -- -- /* Leaf optimization is not only OK: it is useful for avoiding -- stat calls, because dirent.d_type does not work. */ -- NOSTAT_LEAF_OPTIMIZATION -+ /* st_nlink == 2 means the directory lacks subdirectories. */ -+ OK_LEAF_OPTIMIZATION - }; - - #if (defined __linux__ || defined __ANDROID__) \ -@@ -663,9 +657,7 @@ enum leaf_optimization - # define S_MAGIC_CIFS 0xFF534D42 - # define S_MAGIC_NFS 0x6969 - # define S_MAGIC_PROC 0x9FA0 --# define S_MAGIC_REISERFS 0x52654973 - # define S_MAGIC_TMPFS 0x1021994 --# define S_MAGIC_XFS 0x58465342 - - # ifdef HAVE___FSWORD_T - typedef __fsword_t fsword; -@@ -786,23 +778,15 @@ dirent_inode_sort_may_be_useful (FTSENT const *p, int dir_fd) - } - - /* Given an FTS entry P for a directory with descriptor DIR_FD, -- return true if it is both useful and valid to apply leaf optimization. -- The optimization is useful only for file systems that lack usable -- dirent.d_type info. The optimization is valid if an st_nlink value -- of at least MIN_DIR_NLINK is an upper bound on the number of -- subdirectories of D, counting "." and ".." as subdirectories. -+ return whether it is valid to apply leaf optimization. -+ The optimization is valid if a directory's st_nlink value equal -+ to MIN_DIR_NLINK means the directory has no subdirectories. - DIR_FD is negative if unavailable. */ - static enum leaf_optimization - leaf_optimization (FTSENT const *p, int dir_fd) - { - switch (filesystem_type (p, dir_fd)) - { -- /* List here the file system types that may lack usable dirent.d_type -- info, yet for which the optimization does apply. */ -- case S_MAGIC_REISERFS: -- case S_MAGIC_XFS: /* XFS lacked it until 2013-08-22 commit. */ -- return NOSTAT_LEAF_OPTIMIZATION; -- - case 0: - /* Leaf optimization is unsafe if the file system type is unknown. */ - FALLTHROUGH; -@@ -1027,26 +1011,7 @@ check_for_dir: - if (p->fts_info == FTS_NSOK) - { - if (p->fts_statp->st_size == FTS_STAT_REQUIRED) -- { -- FTSENT *parent = p->fts_parent; -- if (parent->fts_n_dirs_remaining == 0 -- && ISSET(FTS_NOSTAT) -- && ISSET(FTS_PHYSICAL) -- && (leaf_optimization (parent, sp->fts_cwd_fd) -- == NOSTAT_LEAF_OPTIMIZATION)) -- { -- /* nothing more needed */ -- } -- else -- { -- p->fts_info = fts_stat(sp, p, false); -- if (S_ISDIR(p->fts_statp->st_mode) -- && p->fts_level != FTS_ROOTLEVEL -- && 0 < parent->fts_n_dirs_remaining -- && parent->fts_n_dirs_remaining != (nlink_t) -1) -- parent->fts_n_dirs_remaining--; -- } -- } -+ p->fts_info = fts_stat(sp, p, false); - else - fts_assert (p->fts_statp->st_size == FTS_NO_STAT_REQUIRED); - } -@@ -1830,11 +1795,6 @@ err: memset(sbp, 0, sizeof(struct stat)); - } - - if (S_ISDIR(sbp->st_mode)) { -- p->fts_n_dirs_remaining -- = ((sbp->st_nlink < MIN_DIR_NLINK -- || p->fts_level <= FTS_ROOTLEVEL) -- ? -1 -- : sbp->st_nlink - (ISSET (FTS_SEEDOT) ? 0 : MIN_DIR_NLINK)); - if (ISDOT(p->fts_name)) { - /* Command-line "." and ".." are real directories. */ - return (p->fts_level == FTS_ROOTLEVEL ? FTS_D : FTS_DOT); -diff --git a/lib/fts_.h b/lib/fts_.h -index d40a116..2e76cc4 100644 ---- a/lib/fts_.h -+++ b/lib/fts_.h -@@ -227,11 +227,6 @@ typedef struct _ftsent { - - size_t fts_namelen; /* strlen(fts_name) */ - -- /* If not (nlink_t) -1, an upper bound on the number of -- remaining subdirectories of interest. If this becomes -- zero, some work can be avoided. */ -- nlink_t fts_n_dirs_remaining; -- - # define FTS_D 1 /* preorder directory */ - # define FTS_DC 2 /* directory that causes cycles */ - # define FTS_DEFAULT 3 /* none of the above */ --- -2.21.1 - diff --git a/coreutils-8.32-ls-removed-dir.patch b/coreutils-8.32-ls-removed-dir.patch deleted file mode 100644 index 77dce894e0788d7416f03cb9cadb0f899f365f87..0000000000000000000000000000000000000000 --- a/coreutils-8.32-ls-removed-dir.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 8c022656320592dbad146f5d3a3ae1875f419446 Mon Sep 17 00:00:00 2001 -From: Paul Eggert -Date: Thu, 5 Mar 2020 17:25:29 -0800 -Subject: [PATCH 1/2] ls: restore 8.31 behavior on removed directories - -* NEWS: Mention this. -* src/ls.c: Do not include -(print_dir): Don't worry about whether the directory is removed. -* tests/ls/removed-directory.sh: Adjust to match new (i.e., old) -behavior. - -Upstream-commit: 10fcb97bd728f09d4a027eddf8ad2900f0819b0a -Signed-off-by: Kamil Dudka ---- - src/ls.c | 22 ---------------------- - tests/ls/removed-directory.sh | 10 ++-------- - 2 files changed, 2 insertions(+), 30 deletions(-) - -diff --git a/src/ls.c b/src/ls.c -index 9d25f62..850ecc2 100644 ---- a/src/ls.c -+++ b/src/ls.c -@@ -49,10 +49,6 @@ - # include - #endif - --#ifdef __linux__ --# include --#endif -- - #include - #include - #include -@@ -2896,7 +2892,6 @@ print_dir (char const *name, char const *realname, bool command_line_arg) - struct dirent *next; - uintmax_t total_blocks = 0; - static bool first = true; -- bool found_any_entries = false; - - errno = 0; - dirp = opendir (name); -@@ -2972,7 +2967,6 @@ print_dir (char const *name, char const *realname, bool command_line_arg) - next = readdir (dirp); - if (next) - { -- found_any_entries = true; - if (! file_ignored (next->d_name)) - { - enum filetype type = unknown; -@@ -3018,22 +3012,6 @@ print_dir (char const *name, char const *realname, bool command_line_arg) - if (errno != EOVERFLOW) - break; - } --#ifdef __linux__ -- else if (! found_any_entries) -- { -- /* If readdir finds no directory entries at all, not even "." or -- "..", then double check that the directory exists. */ -- if (syscall (SYS_getdents, dirfd (dirp), NULL, 0) == -1 -- && errno != EINVAL) -- { -- /* We exclude EINVAL as that pertains to buffer handling, -- and we've passed NULL as the buffer for simplicity. -- ENOENT is returned if appropriate before buffer handling. */ -- file_failure (command_line_arg, _("reading directory %s"), name); -- } -- break; -- } --#endif - else - break; - -diff --git a/tests/ls/removed-directory.sh b/tests/ls/removed-directory.sh -index e8c835d..fe8f929 100755 ---- a/tests/ls/removed-directory.sh -+++ b/tests/ls/removed-directory.sh -@@ -26,20 +26,14 @@ case $host_triplet in - *) skip_ 'non linux kernel' ;; - esac - --LS_FAILURE=2 -- --cat <<\EOF >exp-err || framework_failure_ --ls: reading directory '.': No such file or directory --EOF -- - cwd=$(pwd) - mkdir d || framework_failure_ - cd d || framework_failure_ - rmdir ../d || framework_failure_ - --returns_ $LS_FAILURE ls >../out 2>../err || fail=1 -+ls >../out 2>../err || fail=1 - cd "$cwd" || framework_failure_ - compare /dev/null out || fail=1 --compare exp-err err || fail=1 -+compare /dev/null err || fail=1 - - Exit $fail --- -2.21.1 - - -From 847324a0debd9d12062c79e7a7a9d3d8ce76390d Mon Sep 17 00:00:00 2001 -From: Paul Eggert -Date: Sat, 7 Mar 2020 10:29:51 -0800 -Subject: [PATCH 2/2] ls: improve removed-directory test - -* tests/ls/removed-directory.sh: Remove host_triplet test. -Skip this test if one cannot remove the working directory. -From a suggestion by Bernhard Voelker (Bug#39929). - -Upstream-commit: 672819c73f2e94e61386dc0584bddf9da860cc26 -Signed-off-by: Kamil Dudka ---- - tests/ls/removed-directory.sh | 13 ++++--------- - 1 file changed, 4 insertions(+), 9 deletions(-) - -diff --git a/tests/ls/removed-directory.sh b/tests/ls/removed-directory.sh -index fe8f929..63b209d 100755 ---- a/tests/ls/removed-directory.sh -+++ b/tests/ls/removed-directory.sh -@@ -1,7 +1,7 @@ - #!/bin/sh --# If ls is asked to list a removed directory (e.g. the parent process's --# current working directory that has been removed by another process), it --# emits an error message. -+# If ls is asked to list a removed directory (e.g., the parent process's -+# current working directory has been removed by another process), it -+# should not emit an error message merely because the directory is removed. - - # Copyright (C) 2020 Free Software Foundation, Inc. - -@@ -21,15 +21,10 @@ - . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src - print_ver_ ls - --case $host_triplet in -- *linux*) ;; -- *) skip_ 'non linux kernel' ;; --esac -- - cwd=$(pwd) - mkdir d || framework_failure_ - cd d || framework_failure_ --rmdir ../d || framework_failure_ -+rmdir ../d || skip_ "can't remove working directory on this platform" - - ls >../out 2>../err || fail=1 - cd "$cwd" || framework_failure_ --- -2.21.1 - diff --git a/coreutils-8.32.tar.xz b/coreutils-9.0.tar.xz similarity index 42% rename from coreutils-8.32.tar.xz rename to coreutils-9.0.tar.xz index ac6ccc8c5a82d5d6fc29535cf5487ea75353eabc..782c214d2ab0445bf7b16c933838b5f88c16d198 100644 Binary files a/coreutils-8.32.tar.xz and b/coreutils-9.0.tar.xz differ diff --git a/coreutils-df-direct.patch b/coreutils-df-direct.patch deleted file mode 100644 index 248a6aecbf454bf1447a79dcfc6cf91b915654bc..0000000000000000000000000000000000000000 --- a/coreutils-df-direct.patch +++ /dev/null @@ -1,174 +0,0 @@ -diff --git a/doc/coreutils.texi b/doc/coreutils.texi -index a507280..400e135 100644 ---- a/doc/coreutils.texi -+++ b/doc/coreutils.texi -@@ -11303,6 +11303,13 @@ some systems (notably SunOS), doing this yields more up to date results, - but in general this option makes @command{df} much slower, especially when - there are many or very busy file systems. - -+@item --direct -+@opindex --direct -+@cindex direct statfs for a file -+Do not resolve mount point and show statistics directly for a file. It can be -+especially useful for NFS mount points if there is a boundary between two -+storage policies behind the mount point. -+ - @item --total - @opindex --total - @cindex grand total of disk size, usage and available space -diff --git a/src/df.c b/src/df.c -index 8f760db..a7385fd 100644 ---- a/src/df.c -+++ b/src/df.c -@@ -120,6 +120,9 @@ static bool print_type; - /* If true, print a grand total at the end. */ - static bool print_grand_total; - -+/* If true, show statistics for a file instead of mount point. */ -+static bool direct_statfs; -+ - /* Grand total data. */ - static struct fs_usage grand_fsu; - -@@ -247,13 +250,15 @@ enum - NO_SYNC_OPTION = CHAR_MAX + 1, - SYNC_OPTION, - TOTAL_OPTION, -- OUTPUT_OPTION -+ OUTPUT_OPTION, -+ DIRECT_OPTION - }; - - static struct option const long_options[] = - { - {"all", no_argument, NULL, 'a'}, - {"block-size", required_argument, NULL, 'B'}, -+ {"direct", no_argument, NULL, DIRECT_OPTION}, - {"inodes", no_argument, NULL, 'i'}, - {"human-readable", no_argument, NULL, 'h'}, - {"si", no_argument, NULL, 'H'}, -@@ -509,7 +514,10 @@ get_header (void) - for (col = 0; col < ncolumns; col++) - { - char *cell = NULL; -- char const *header = _(columns[col]->caption); -+ char const *header = (columns[col]->field == TARGET_FIELD -+ && direct_statfs)? -+ _("File") : -+ _(columns[col]->caption); - - if (columns[col]->field == SIZE_FIELD - && (header_mode == DEFAULT_MODE -@@ -1397,6 +1405,19 @@ get_point (const char *point, const struct stat *statp) - static void - get_entry (char const *name, struct stat const *statp) - { -+ if (direct_statfs) -+ { -+ char *resolved = canonicalize_file_name (name); -+ if (resolved) -+ { -+ char *mp = find_mount_point (name, statp); -+ get_dev (NULL, mp, resolved, NULL, NULL, false, false, NULL, false); -+ free(mp); -+ free (resolved); -+ return; -+ } -+ } -+ - if ((S_ISBLK (statp->st_mode) || S_ISCHR (statp->st_mode)) - && get_disk (name)) - return; -@@ -1467,6 +1488,7 @@ or all file systems by default.\n\ - -B, --block-size=SIZE scale sizes by SIZE before printing them; e.g.,\n\ - '-BM' prints sizes in units of 1,048,576 bytes;\n\ - see SIZE format below\n\ -+ --direct show statistics for a file instead of mount point\n\ - -h, --human-readable print sizes in powers of 1024 (e.g., 1023M)\n\ - -H, --si print sizes in powers of 1000 (e.g., 1.1G)\n\ - "), stdout); -@@ -1557,6 +1579,9 @@ main (int argc, char **argv) - xstrtol_fatal (e, oi, c, long_options, optarg); - } - break; -+ case DIRECT_OPTION: -+ direct_statfs = true; -+ break; - case 'i': - if (header_mode == OUTPUT_MODE) - { -@@ -1653,6 +1678,13 @@ main (int argc, char **argv) - } - } - -+ if (direct_statfs && show_local_fs) -+ { -+ error (0, 0, _("options --direct and --local (-l) are mutually " -+ "exclusive")); -+ usage (EXIT_FAILURE); -+ } -+ - if (human_output_opts == -1) - { - if (posix_format) -diff --git a/tests/df/direct.sh b/tests/df/direct.sh -new file mode 100755 -index 0000000..8e4cfb8 ---- /dev/null -+++ b/tests/df/direct.sh -@@ -0,0 +1,55 @@ -+#!/bin/sh -+# Ensure "df --direct" works as documented -+ -+# Copyright (C) 2010 Free Software Foundation, Inc. -+ -+# This program is free software: you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation, either version 3 of the License, or -+# (at your option) any later version. -+ -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+ -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+. "${srcdir=.}/init.sh"; path_prepend_ ../src -+print_ver_ df -+ -+df || skip_ "df fails" -+ -+DIR=`pwd` || framework_failure -+FILE="$DIR/file" -+touch "$FILE" || framework_failure -+echo "$FILE" > file_exp || framework_failure -+echo "Mounted on" > header_mounted_exp || framework_failure -+echo "File" > header_file_exp || framework_failure -+ -+fail=0 -+ -+df --portability "$FILE" > df_out || fail=1 -+df --portability --direct "$FILE" > df_direct_out || fail=1 -+df --portability --direct --local "$FILE" > /dev/null 2>&1 && fail=1 -+ -+# check df header -+$AWK '{ if (NR==1) print $6 " " $7; }' df_out > header_mounted_out \ -+ || framework_failure -+$AWK '{ if (NR==1) print $6; }' df_direct_out > header_file_out \ -+ || framework_failure -+compare header_mounted_out header_mounted_exp || fail=1 -+compare header_file_out header_file_exp || fail=1 -+ -+# check df output (without --direct) -+$AWK '{ if (NR==2) print $6; }' df_out > file_out \ -+ || framework_failure -+compare file_out file_exp && fail=1 -+ -+# check df output (with --direct) -+$AWK '{ if (NR==2) print $6; }' df_direct_out > file_out \ -+ || framework_failure -+compare file_out file_exp || fail=1 -+ -+Exit $fail diff --git a/coreutils.spec b/coreutils.spec index c46ee83c968705bca7062b59da0ae546a49da2bd..915a85724793809f4e9f645724b250035db85558 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -1,6 +1,6 @@ Name: coreutils -Version: 8.32 -Release: 7 +Version: 9.0 +Release: 1 License: GPLv3+ Summary: A set of basic GNU tools commonly used in shell scripts Url: https://www.gnu.org/software/coreutils/ @@ -10,23 +10,14 @@ Source0: https://ftp.gnu.org/gnu/%{name}/%{name}-%{version}.tar.xz %global __requires_exclude ^%{_bindir}/coreutils$ %global user `ls -ld $USR_SCONF|awk '{print $3}'` -Patch0: 0001-coreutils-8.31-i18n.patch -Patch1: 0001-disable-test-of-rwlock.patch +Patch0: 0001-disable-test-of-rwlock.patch # uname -p/-i to display processor type -Patch2: coreutils-8.2-uname-processortype.patch -# df --direct -Patch3: coreutils-df-direct.patch - -Patch4: coreutils-getgrouplist.patch -Patch5: bugfix-remove-usr-local-lib-from-m4.patch -Patch6: bugfix-dummy_help2man.patch -Patch7: bugfix-selinux-flask.patch -Patch8: skip-the-tests-that-require-selinux-if-selinux-is-di.patch - -Patch9: coreutils-8.32-ls-removed-dir.patch -Patch10: coreutils-8.32-leaf-opt-xfs.patch -Patch11: backport-ls-fix-crash-printing-SELinux-context-for-unstatable.patch -Patch12: backport-tr-fix-crash-validating-c-with-some-case-char-classe.patch +Patch1: coreutils-8.2-uname-processortype.patch +Patch2: coreutils-getgrouplist.patch +Patch3: bugfix-remove-usr-local-lib-from-m4.patch +Patch4: bugfix-dummy_help2man.patch +Patch5: bugfix-selinux-flask.patch +Patch6: skip-the-tests-that-require-selinux-if-selinux-is-di.patch Conflicts: filesystem < 3 # To avoid clobbering installs @@ -141,6 +132,9 @@ fi %{_mandir}/man*/* %changelog +* Sat Nov 27 2021 wangchen - 9.0-1 +- Update to 9.0 + * Tue Jul 20 2021 wangchen - 8.32-7 - Delete unnecessary gdb from BuildRequires