From fae330005489ed78af57b0c960d28cbfe2cc4e44 Mon Sep 17 00:00:00 2001 From: zhoupengcheng Date: Tue, 7 Nov 2023 11:24:01 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=87=E6=8D=A2=E4=BD=BF=E7=94=A8prce2=20?= =?UTF-8?q?=E5=B9=B6=E4=B8=94=E9=87=8D=E5=91=BD=E5=90=8D=20colorgrep.sh?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry picked from commit e9255d1736d582890f0b9f7643124431110030c5) --- ...t-fix-regex-compilation-memory-leaks.patch | 59 ++ ...-Don-t-limit-jitstack_max-to-INT_MAX.patch | 43 ++ backport-grep-fix-minor-P-memory-leak.patch | 29 + backport-grep-migrate-to-pcre2.patch | 570 ++++++++++++++++++ ...p-speed-up-fix-bad-UTF8-check-with-P.patch | 54 ++ backport-grep-work-around-PCRE-bug.patch | 38 ++ backport-pcre-use-UCP-in-UTF-mode.patch | 11 +- colorgrep.sh => color_grep.sh | 0 grep.spec | 32 +- 9 files changed, 822 insertions(+), 14 deletions(-) create mode 100644 backport-fix-regex-compilation-memory-leaks.patch create mode 100644 backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch create mode 100644 backport-grep-fix-minor-P-memory-leak.patch create mode 100644 backport-grep-migrate-to-pcre2.patch create mode 100644 backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch create mode 100644 backport-grep-work-around-PCRE-bug.patch rename colorgrep.sh => color_grep.sh (100%) diff --git a/backport-fix-regex-compilation-memory-leaks.patch b/backport-fix-regex-compilation-memory-leaks.patch new file mode 100644 index 0000000..1f05e9f --- /dev/null +++ b/backport-fix-regex-compilation-memory-leaks.patch @@ -0,0 +1,59 @@ +From e2aec8c91e9d6ed3fc76f9f145dec8a456ce623a Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Fri, 24 Jun 2022 17:53:34 -0500 +Subject: grep: fix regex compilation memory leaks + +Problem reported by Jim Meyering in: +https://lists.gnu.org/r/grep-devel/2022-06/msg00012.html +* src/dfasearch.c (regex_compile): Fix memory leaks when SYNTAX_ONLY. +--- + src/dfasearch.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +diff --git a/src/dfasearch.c b/src/dfasearch.c +index 2d0e861..a5e348f 100644 +--- a/src/dfasearch.c ++++ b/src/dfasearch.c +@@ -148,24 +148,32 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len, + ptrdiff_t pcount, ptrdiff_t lineno, reg_syntax_t syntax_bits, + bool syntax_only) + { +- struct re_pattern_buffer pat0; +- struct re_pattern_buffer *pat = syntax_only ? &pat0 : &dc->patterns[pcount]; +- pat->buffer = NULL; +- pat->allocated = 0; ++ struct re_pattern_buffer pat; ++ pat.buffer = NULL; ++ pat.allocated = 0; + + /* Do not use a fastmap with -i, to work around glibc Bug#20381. */ +- pat->fastmap = (syntax_only | match_icase) ? NULL : xmalloc (UCHAR_MAX + 1); ++ pat.fastmap = syntax_only | match_icase ? NULL : xmalloc (UCHAR_MAX + 1); + +- pat->translate = NULL; ++ pat.translate = NULL; + + if (syntax_only) + re_set_syntax (syntax_bits | RE_NO_SUB); + else + re_set_syntax (syntax_bits); + +- char const *err = re_compile_pattern (p, len, pat); ++ char const *err = re_compile_pattern (p, len, &pat); + if (!err) +- return true; ++ { ++ if (syntax_only) ++ regfree (&pat); ++ else ++ dc->patterns[pcount] = pat; ++ ++ return true; ++ } ++ ++ free (pat.fastmap); + + /* Emit a filename:lineno: prefix for patterns taken from files. */ + size_t pat_lineno; +-- +cgit v1.1 diff --git a/backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch b/backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch new file mode 100644 index 0000000..c9de3e2 --- /dev/null +++ b/backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch @@ -0,0 +1,43 @@ +From 6f84f3be1cdd3aadacc42007582116d1c2c0a3e4 Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Fri, 12 Nov 2021 21:30:25 -0800 +Subject: [PATCH] =?UTF-8?q?grep:=20Don=E2=80=99t=20limit=20jitstack=5Fmax?= + =?UTF-8?q?=20to=20INT=5FMAX?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +* src/pcresearch.c (jit_exec): Remove arbitrary INT_MAX limit on JIT +stack size. + +Reference: https://git.savannah.gnu.org/cgit/grep.git/commit?id=6f84f3be1cdd3aadacc42007582116d1c2c0a3e4 +Conflict:context adaptation +--- + src/pcresearch.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/src/pcresearch.c b/src/pcresearch.c +index caedf49..68ac1fd 100644 +--- a/src/pcresearch.c ++++ b/src/pcresearch.c +@@ -60,10 +60,16 @@ jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes, + { + while (true) + { ++ /* STACK_GROWTH_RATE is taken from PCRE's src/pcre2_jit_compile.c. ++ Going over the jitstack_max limit could trigger an int ++ overflow bug within PCRE. */ ++ int STACK_GROWTH_RATE = 8192; ++ size_t jitstack_max = SIZE_MAX - (STACK_GROWTH_RATE - 1); ++ + int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes, + search_offset, options, pc->data, pc->mcontext); + if (e == PCRE2_ERROR_JIT_STACKLIMIT +- && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2) ++ && 0 < pc->jit_stack_size && pc->jit_stack_size <= jitstack_max / 2) + { + PCRE2_SIZE old_size = pc->jit_stack_size; + PCRE2_SIZE new_size = pc->jit_stack_size = old_size * 2; +-- +2.33.0 + diff --git a/backport-grep-fix-minor-P-memory-leak.patch b/backport-grep-fix-minor-P-memory-leak.patch new file mode 100644 index 0000000..a0249e7 --- /dev/null +++ b/backport-grep-fix-minor-P-memory-leak.patch @@ -0,0 +1,29 @@ +From ad6e5cbcf598f55cafe83a11487ea4a6694e433b Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Sun, 14 Nov 2021 10:54:12 -0800 +Subject: [PATCH] grep: fix minor -P memory leak + +* src/pcresearch.c (Pcompile): Free ccontext when no longer needed. + +Reference: https://git.savannah.gnu.org/cgit/grep.git/commit?id=ad6e5cbcf598f55cafe83a11487ea4a6694e433b +Conflict:context adaptation +--- + src/pcresearch.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/pcresearch.c b/src/pcresearch.c +index badcd4c..c287d99 100644 +--- a/src/pcresearch.c ++++ b/src/pcresearch.c +@@ -184,6 +184,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) + die (EXIT_TROUBLE, 0, "%s", ep); + } + ++ pcre2_compile_context_free (ccontext); ++ + pc->data = pcre2_match_data_create_from_pattern (pc->cre, NULL); + + ec = pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE); +-- +2.33.0 + diff --git a/backport-grep-migrate-to-pcre2.patch b/backport-grep-migrate-to-pcre2.patch new file mode 100644 index 0000000..1eeb219 --- /dev/null +++ b/backport-grep-migrate-to-pcre2.patch @@ -0,0 +1,570 @@ +From e0d39a9133e1507345d73ac5aff85f037f39aa54 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= +Date: Fri, 12 Nov 2021 16:45:04 -0800 +Subject: [PATCH] grep: migrate to pcre2 + +Mostly a bug by bug translation of the original code to the PCRE2 API. +Code still could do with some optimizations but should be good as a +starting point. + +The API changes the sign of some types and therefore some ugly casts +were needed, some of the changes are just to make sure all variables +fit into the newer types better. + +Includes backward compatibility and could be made to build all the way +to 10.00, but assumes a recent enough version and has been tested with +10.23 (from CentOS 7, the oldest). + +Performance seems equivalent, and it also seems functionally complete. + +* m4/pcre.m4 (gl_FUNC_PCRE): Check for PCRE2, not the original PCRE. +* src/pcresearch.c (struct pcre_comp, jit_exec) +(Pcompile, Pexecute): +Use PCRE2, not the original PCRE. +* tests/filename-lineno.pl: Adjust to match PCRE2 diagnostics. + +Reference: https://git.savannah.gnu.org/cgit/grep.git/commit?id=e0d39a9133e1507345d73ac5aff85f037f39aa54 +Conflict:context adaptation +--- + doc/grep.in.1 | 8 +- + doc/grep.texi | 2 +- + m4/pcre.m4 | 21 ++-- + src/pcresearch.c | 249 +++++++++++++++++++-------------------- + tests/filename-lineno.pl | 4 +- + 5 files changed, 138 insertions(+), 146 deletions(-) + +diff --git a/doc/grep.in.1 b/doc/grep.in.1 +index e8854f2..21bb471 100644 +--- a/doc/grep.in.1 ++++ b/doc/grep.in.1 +@@ -767,7 +767,7 @@ In other implementations, basic regular expressions are less powerful. + The following description applies to extended regular expressions; + differences for basic regular expressions are summarized afterwards. + Perl-compatible regular expressions give additional functionality, and are +-documented in B(3) and B(3), but work only if ++documented in B(3) and B(3), but work only if + PCRE support is enabled. + .PP + The fundamental building blocks are the regular expressions +@@ -1371,9 +1371,9 @@ from the globbing syntax that the shell uses to match file names. + .BR sort (1), + .BR xargs (1), + .BR read (2), +-.BR pcre (3), +-.BR pcresyntax (3), +-.BR pcrepattern (3), ++.BR pcre2 (3), ++.BR pcre2syntax (3), ++.BR pcre2pattern (3), + .BR terminfo (5), + .BR glob (7), + .BR regex (7) +diff --git a/doc/grep.texi b/doc/grep.texi +index 01ac81e..aae8571 100644 +--- a/doc/grep.texi ++++ b/doc/grep.texi +@@ -1186,7 +1186,7 @@ In other implementations, basic regular expressions are less powerful. + The following description applies to extended regular expressions; + differences for basic regular expressions are summarized afterwards. + Perl-compatible regular expressions give additional functionality, and +-are documented in the @i{pcresyntax}(3) and @i{pcrepattern}(3) manual ++are documented in the @i{pcre2syntax}(3) and @i{pcre2pattern}(3) manual + pages, but work only if PCRE is available in the system. + + @menu +diff --git a/m4/pcre.m4 b/m4/pcre.m4 +index 78b7fda..a1c6c82 100644 +--- a/m4/pcre.m4 ++++ b/m4/pcre.m4 +@@ -1,4 +1,4 @@ +-# pcre.m4 - check for libpcre support ++# pcre.m4 - check for PCRE library support + + # Copyright (C) 2010-2021 Free Software Foundation, Inc. + # This file is free software; the Free Software Foundation +@@ -9,7 +9,7 @@ AC_DEFUN([gl_FUNC_PCRE], + [ + AC_ARG_ENABLE([perl-regexp], + AS_HELP_STRING([--disable-perl-regexp], +- [disable perl-regexp (pcre) support]), ++ [disable perl-regexp (pcre2) support]), + [case $enableval in + yes|no) test_pcre=$enableval;; + *) AC_MSG_ERROR([invalid value $enableval for --disable-perl-regexp]);; +@@ -21,24 +21,25 @@ AC_DEFUN([gl_FUNC_PCRE], + use_pcre=no + + if test $test_pcre != no; then +- PKG_CHECK_MODULES([PCRE], [libpcre], [], [: ${PCRE_LIBS=-lpcre}]) ++ PKG_CHECK_MODULES([PCRE], [libpcre2-8], [], [: ${PCRE_LIBS=-lpcre2-8}]) + +- AC_CACHE_CHECK([for pcre_compile], [pcre_cv_have_pcre_compile], ++ AC_CACHE_CHECK([for pcre2_compile], [pcre_cv_have_pcre2_compile], + [pcre_saved_CFLAGS=$CFLAGS + pcre_saved_LIBS=$LIBS + CFLAGS="$CFLAGS $PCRE_CFLAGS" + LIBS="$PCRE_LIBS $LIBS" + AC_LINK_IFELSE( +- [AC_LANG_PROGRAM([[#include ++ [AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8 ++ #include + ]], +- [[pcre *p = pcre_compile (0, 0, 0, 0, 0); ++ [[pcre2_code *p = pcre2_compile (0, 0, 0, 0, 0, 0); + return !p;]])], +- [pcre_cv_have_pcre_compile=yes], +- [pcre_cv_have_pcre_compile=no]) ++ [pcre_cv_have_pcre2_compile=yes], ++ [pcre_cv_have_pcre2_compile=no]) + CFLAGS=$pcre_saved_CFLAGS + LIBS=$pcre_saved_LIBS]) + +- if test "$pcre_cv_have_pcre_compile" = yes; then ++ if test "$pcre_cv_have_pcre2_compile" = yes; then + use_pcre=yes + elif test $test_pcre = maybe; then + AC_MSG_WARN([AC_PACKAGE_NAME will be built without pcre support.]) +@@ -50,7 +51,7 @@ AC_DEFUN([gl_FUNC_PCRE], + if test $use_pcre = yes; then + AC_DEFINE([HAVE_LIBPCRE], [1], + [Define to 1 if you have the Perl Compatible Regular Expressions +- library (-lpcre).]) ++ library (-lpcre2).]) + else + PCRE_CFLAGS= + PCRE_LIBS= +diff --git a/src/pcresearch.c b/src/pcresearch.c +index 8070d06..2916d31 100644 +--- a/src/pcresearch.c ++++ b/src/pcresearch.c +@@ -17,41 +17,32 @@ + 02110-1301, USA. */ + + /* Written August 1992 by Mike Haertel. */ ++/* Updated for PCRE2 by Carlo Arenas. */ + + #include + #include "search.h" + #include "die.h" + +-#include ++#define PCRE2_CODE_UNIT_WIDTH 8 ++#include + +-/* This must be at least 2; everything after that is for performance +- in pcre_exec. */ +-enum { NSUB = 300 }; +- +-#ifndef PCRE_EXTRA_MATCH_LIMIT_RECURSION +-# define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0 +-#endif +-#ifndef PCRE_STUDY_JIT_COMPILE +-# define PCRE_STUDY_JIT_COMPILE 0 +-#endif +-#ifndef PCRE_STUDY_EXTRA_NEEDED +-# define PCRE_STUDY_EXTRA_NEEDED 0 ++/* Needed for backward compatibility for PCRE2 < 10.30 */ ++#ifndef PCRE2_CONFIG_DEPTHLIMIT ++#define PCRE2_CONFIG_DEPTHLIMIT PCRE2_CONFIG_RECURSIONLIMIT ++#define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT ++#define pcre2_set_depth_limit pcre2_set_recursion_limit + #endif + + struct pcre_comp + { +- /* Compiled internal form of a Perl regular expression. */ +- pcre *cre; +- +- /* Additional information about the pattern. */ +- pcre_extra *extra; +- +-#if PCRE_STUDY_JIT_COMPILE + /* The JIT stack and its maximum size. */ +- pcre_jit_stack *jit_stack; +- int jit_stack_size; +-#endif ++ pcre2_jit_stack *jit_stack; ++ PCRE2_SIZE jit_stack_size; + ++ /* Compiled internal form of a Perl regular expression. */ ++ pcre2_code *cre; ++ pcre2_match_context *mcontext; ++ pcre2_match_data *data; + /* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty + string matches when that flag is used. */ + int empty_match[2]; +@@ -60,54 +51,49 @@ struct pcre_comp + + /* Match the already-compiled PCRE pattern against the data in SUBJECT, + of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with +- options OPTIONS, and storing resulting matches into SUB. Return +- the (nonnegative) match location or a (negative) error number. */ ++ options OPTIONS. ++ Return the (nonnegative) match count or a (negative) error number. */ + static int +-jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, +- int search_offset, int options, int *sub) ++jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes, ++ PCRE2_SIZE search_offset, int options) + { + while (true) + { +- int e = pcre_exec (pc->cre, pc->extra, subject, search_bytes, +- search_offset, options, sub, NSUB); +- +-#if PCRE_STUDY_JIT_COMPILE +- /* Going over this would trigger an int overflow bug within PCRE. */ +- int jitstack_max = INT_MAX - 8 * 1024; +- +- if (e == PCRE_ERROR_JIT_STACKLIMIT +- && 0 < pc->jit_stack_size && pc->jit_stack_size <= jitstack_max / 2) ++ int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes, ++ search_offset, options, pc->data, pc->mcontext); ++ if (e == PCRE2_ERROR_JIT_STACKLIMIT ++ && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2) + { +- int old_size = pc->jit_stack_size; +- int new_size = pc->jit_stack_size = old_size * 2; ++ PCRE2_SIZE old_size = pc->jit_stack_size; ++ PCRE2_SIZE new_size = pc->jit_stack_size = old_size * 2; ++ + if (pc->jit_stack) +- pcre_jit_stack_free (pc->jit_stack); +- pc->jit_stack = pcre_jit_stack_alloc (old_size, new_size); +- if (!pc->jit_stack) ++ pcre2_jit_stack_free (pc->jit_stack); ++ pc->jit_stack = pcre2_jit_stack_create (old_size, new_size, NULL); ++ ++ if (!pc->mcontext) ++ pc->mcontext = pcre2_match_context_create (NULL); ++ ++ if (!pc->jit_stack || !pc->mcontext) + die (EXIT_TROUBLE, 0, + _("failed to allocate memory for the PCRE JIT stack")); +- pcre_assign_jit_stack (pc->extra, NULL, pc->jit_stack); ++ pcre2_jit_stack_assign (pc->mcontext, NULL, pc->jit_stack); + continue; + } +-#endif +- +-#if PCRE_EXTRA_MATCH_LIMIT_RECURSION +- if (e == PCRE_ERROR_RECURSIONLIMIT +- && (PCRE_STUDY_EXTRA_NEEDED || pc->extra)) ++ if (e == PCRE2_ERROR_DEPTHLIMIT) + { +- unsigned long lim +- = (pc->extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION +- ? pc->extra->match_limit_recursion +- : 0); +- if (lim <= ULONG_MAX / 2) +- { +- pc->extra->match_limit_recursion = lim ? 2 * lim : (1 << 24) - 1; +- pc->extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; +- continue; +- } +- } +-#endif ++ uint32_t lim; ++ pcre2_config (PCRE2_CONFIG_DEPTHLIMIT, &lim); ++ if (lim >= UINT32_MAX / 2) ++ return e; ++ ++ lim <<= 1; ++ if (!pc->mcontext) ++ pc->mcontext = pcre2_match_context_create (NULL); + ++ pcre2_set_depth_limit (pc->mcontext, lim); ++ continue; ++ } + return e; + } + } +@@ -118,27 +104,35 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, + void * + Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) + { +- int e; +- char const *ep; ++ PCRE2_SIZE e; ++ int ec; ++ PCRE2_UCHAR8 ep[128]; /* 120 code units is suggested to avoid truncation */ + static char const wprefix[] = "(?cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ()); ++ pcre2_set_character_tables (ccontext, pcre2_maketables (NULL)); ++ pc->cre = pcre2_compile (re, n - (char *)re, flags, &ec, &e, ccontext); + if (!pc->cre) +- die (EXIT_TROUBLE, 0, "%s", ep); +- +- int pcre_study_flags = PCRE_STUDY_EXTRA_NEEDED | PCRE_STUDY_JIT_COMPILE; +- pc->extra = pcre_study (pc->cre, pcre_study_flags, &ep); +- if (ep) +- die (EXIT_TROUBLE, 0, "%s", ep); ++ { ++ pcre2_get_error_message (ec, ep, sizeof (ep)); ++ die (EXIT_TROUBLE, 0, "%s", ep); ++ } + +-#if PCRE_STUDY_JIT_COMPILE +- if (pcre_fullinfo (pc->cre, pc->extra, PCRE_INFO_JIT, &e)) +- die (EXIT_TROUBLE, 0, _("internal error (should never happen)")); ++ pc->data = pcre2_match_data_create_from_pattern (pc->cre, NULL); + +- /* The PCRE documentation says that a 32 KiB stack is the default. */ +- if (e) +- pc->jit_stack_size = 32 << 10; +-#endif ++ ec = pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE); ++ if (ec && ec != PCRE2_ERROR_JIT_BADOPTION && ec != PCRE2_ERROR_NOMEMORY) ++ die (EXIT_TROUBLE, 0, _("JIT internal error: %d"), ec); ++ else ++ { ++ /* The PCRE documentation says that a 32 KiB stack is the default. */ ++ pc->jit_stack_size = 32 << 10; ++ } + + free (re); + +- int sub[NSUB]; +- pc->empty_match[false] = pcre_exec (pc->cre, pc->extra, "", 0, 0, +- PCRE_NOTBOL, sub, NSUB); +- pc->empty_match[true] = pcre_exec (pc->cre, pc->extra, "", 0, 0, 0, sub, +- NSUB); ++ pc->empty_match[false] = jit_exec (pc, "", 0, 0, PCRE2_NOTBOL); ++ pc->empty_match[true] = jit_exec (pc, "", 0, 0, 0); + + return pc; + } +@@ -209,15 +189,15 @@ size_t + Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + char const *start_ptr) + { +- int sub[NSUB]; + char const *p = start_ptr ? start_ptr : buf; + bool bol = p[-1] == eolbyte; + char const *line_start = buf; +- int e = PCRE_ERROR_NOMATCH; ++ int e = PCRE2_ERROR_NOMATCH; + char const *line_end; + struct pcre_comp *pc = vcp; ++ PCRE2_SIZE *sub = pcre2_get_ovector_pointer (pc->data); + +- /* The search address to pass to pcre_exec. This is the start of ++ /* The search address to pass to PCRE. This is the start of + the buffer, or just past the most-recently discovered encoding + error or line end. */ + char const *subject = buf; +@@ -229,14 +209,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + better and the correctness issues were too puzzling. See + Bug#22655. */ + line_end = rawmemchr (p, eolbyte); +- if (INT_MAX < line_end - p) ++ if (PCRE2_SIZE_MAX < line_end - p) + die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit")); + + for (;;) + { + /* Skip past bytes that are easily determined to be encoding + errors, treating them as data that cannot match. This is +- faster than having pcre_exec check them. */ ++ faster than having PCRE check them. */ + while (localeinfo.sbclen[to_uchar (*p)] == -1) + { + p++; +@@ -244,10 +224,10 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + bol = false; + } + +- int search_offset = p - subject; ++ PCRE2_SIZE search_offset = p - subject; + + /* Check for an empty match; this is faster than letting +- pcre_exec do it. */ ++ PCRE do it. */ + if (p == line_end) + { + sub[0] = sub[1] = search_offset; +@@ -257,13 +237,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + + int options = 0; + if (!bol) +- options |= PCRE_NOTBOL; ++ options |= PCRE2_NOTBOL; + +- e = jit_exec (pc, subject, line_end - subject, search_offset, +- options, sub); +- if (e != PCRE_ERROR_BADUTF8) ++ e = jit_exec (pc, subject, line_end - subject, ++ search_offset, options); ++ /* PCRE2 provides 22 different error codes for bad UTF-8 */ ++ if (! (PCRE2_ERROR_UTF8_ERR21 <= e && e < PCRE2_ERROR_UTF8_ERR1)) + break; +- int valid_bytes = sub[0]; ++ PCRE2_SIZE valid_bytes = pcre2_get_startchar (pc->data); + + if (search_offset <= valid_bytes) + { +@@ -273,14 +254,15 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + /* Handle the empty-match case specially, for speed. + This optimization is valid if VALID_BYTES is zero, + which means SEARCH_OFFSET is also zero. */ ++ sub[0] = valid_bytes; + sub[1] = 0; + e = pc->empty_match[bol]; + } + else + e = jit_exec (pc, subject, valid_bytes, search_offset, +- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub); ++ options | PCRE2_NO_UTF_CHECK | PCRE2_NOTEOL); + +- if (e != PCRE_ERROR_NOMATCH) ++ if (e != PCRE2_ERROR_NOMATCH) + break; + + /* Treat the encoding error as data that cannot match. */ +@@ -291,7 +273,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + subject += valid_bytes + 1; + } + +- if (e != PCRE_ERROR_NOMATCH) ++ if (e != PCRE2_ERROR_NOMATCH) + break; + bol = true; + p = subject = line_start = line_end + 1; +@@ -302,26 +284,35 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + { + switch (e) + { +- case PCRE_ERROR_NOMATCH: ++ case PCRE2_ERROR_NOMATCH: + break; + +- case PCRE_ERROR_NOMEMORY: ++ case PCRE2_ERROR_NOMEMORY: + die (EXIT_TROUBLE, 0, _("%s: memory exhausted"), input_filename ()); + +-#if PCRE_STUDY_JIT_COMPILE +- case PCRE_ERROR_JIT_STACKLIMIT: ++ case PCRE2_ERROR_JIT_STACKLIMIT: + die (EXIT_TROUBLE, 0, _("%s: exhausted PCRE JIT stack"), + input_filename ()); +-#endif + +- case PCRE_ERROR_MATCHLIMIT: ++ case PCRE2_ERROR_MATCHLIMIT: + die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's backtracking limit"), + input_filename ()); + +- case PCRE_ERROR_RECURSIONLIMIT: +- die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's recursion limit"), ++ case PCRE2_ERROR_DEPTHLIMIT: ++ die (EXIT_TROUBLE, 0, ++ _("%s: exceeded PCRE's nested backtracking limit"), + input_filename ()); + ++ case PCRE2_ERROR_RECURSELOOP: ++ die (EXIT_TROUBLE, 0, _("%s: PCRE detected recurse loop"), ++ input_filename ()); ++ ++#ifdef PCRE2_ERROR_HEAPLIMIT ++ case PCRE2_ERROR_HEAPLIMIT: ++ die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's heap limit"), ++ input_filename ()); ++#endif ++ + default: + /* For now, we lump all remaining PCRE failures into this basket. + If anyone cares to provide sample grep usage that can trigger +diff --git a/tests/filename-lineno.pl b/tests/filename-lineno.pl +index 1e84b45..1ff3d6a 100755 +--- a/tests/filename-lineno.pl ++++ b/tests/filename-lineno.pl +@@ -101,13 +101,13 @@ my @Tests = + ], + ['invalid-re-P-paren', '-P ")"', {EXIT=>2}, + {ERR => $ENV{PCRE_WORKS} == 1 +- ? "$prog: unmatched parentheses\n" ++ ? "$prog: unmatched closing parenthesis\n" + : $no_pcre + }, + ], + ['invalid-re-P-star-paren', '-P "a.*)"', {EXIT=>2}, + {ERR => $ENV{PCRE_WORKS} == 1 +- ? "$prog: unmatched parentheses\n" ++ ? "$prog: unmatched closing parenthesis\n" + : $no_pcre + }, + ], +-- +2.33.0 + diff --git a/backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch b/backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch new file mode 100644 index 0000000..1aec56f --- /dev/null +++ b/backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch @@ -0,0 +1,54 @@ +From 6e1450408a7921771c41973761995e06445ba18b Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Sat, 13 Nov 2021 13:52:23 -0800 +Subject: [PATCH] grep: speed up, fix bad-UTF8 check with -P + +* src/pcresearch.c (bad_utf8_from_pcre2): New function. Fix bug +where PCRE2_ERROR_UTF8_ERR1 was not treated as an encoding error. +Improve performance when PCRE2_MATCH_INVALID_UTF is defined. +(Pexecute): Use it. + +Reference: https://git.savannah.gnu.org/cgit/grep.git/commit?id=6e1450408a7921771c41973761995e06445ba18b +Conflict:NA +--- + src/pcresearch.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +diff --git a/src/pcresearch.c b/src/pcresearch.c +index 286e1dc..953aca2 100644 +--- a/src/pcresearch.c ++++ b/src/pcresearch.c +@@ -104,6 +104,18 @@ jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes, + } + } + ++/* Return true if E is an error code for bad UTF-8, and if pcre2_match ++ could return E because PCRE lacks PCRE2_MATCH_INVALID_UTF. */ ++static bool ++bad_utf8_from_pcre2 (int e) ++{ ++#ifdef PCRE2_MATCH_INVALID_UTF ++ return false; ++#else ++ return PCRE2_ERROR_UTF8_ERR21 <= e && e <= PCRE2_ERROR_UTF8_ERR1; ++#endif ++} ++ + /* Compile the -P style PATTERN, containing SIZE bytes that are + followed by '\n'. Return a description of the compiled pattern. */ + +@@ -248,9 +260,9 @@ Pexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size, + + e = jit_exec (pc, subject, line_end - subject, + search_offset, options); +- /* PCRE2 provides 22 different error codes for bad UTF-8 */ +- if (! (PCRE2_ERROR_UTF8_ERR21 <= e && e < PCRE2_ERROR_UTF8_ERR1)) ++ if (!bad_utf8_from_pcre2 (e)) + break; ++ + PCRE2_SIZE valid_bytes = pcre2_get_startchar (pc->data); + + if (search_offset <= valid_bytes) +-- +2.23.0 + diff --git a/backport-grep-work-around-PCRE-bug.patch b/backport-grep-work-around-PCRE-bug.patch new file mode 100644 index 0000000..6dd1e20 --- /dev/null +++ b/backport-grep-work-around-PCRE-bug.patch @@ -0,0 +1,38 @@ +From b3a85a1a8a816f4f6f9c01399c16efe92a86ca06 Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Tue, 9 Nov 2021 10:11:42 -0800 +Subject: [PATCH] grep: work around PCRE bug +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Problem reported by Carlo Marcelo Arenas Belón (Bug#51710). +* src/pcresearch.c (jit_exec): Don’t attempt to grow the JIT stack +over INT_MAX - 8 * 1024. + +Reference: https://git.savannah.gnu.org/cgit/grep.git/commit?id=b3a85a1a8a816f4f6f9c01399c16efe92a86ca06 +Conflict: NA +--- + src/pcresearch.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/src/pcresearch.c b/src/pcresearch.c +index 3bdaee9..09f92c8 100644 +--- a/src/pcresearch.c ++++ b/src/pcresearch.c +@@ -72,8 +72,11 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, + search_offset, options, sub, NSUB); + + #if PCRE_STUDY_JIT_COMPILE ++ /* Going over this would trigger an int overflow bug within PCRE. */ ++ int jitstack_max = INT_MAX - 8 * 1024; ++ + if (e == PCRE_ERROR_JIT_STACKLIMIT +- && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2) ++ && 0 < pc->jit_stack_size && pc->jit_stack_size <= jitstack_max / 2) + { + int old_size = pc->jit_stack_size; + int new_size = pc->jit_stack_size = old_size * 2; +-- +2.27.0 + diff --git a/backport-pcre-use-UCP-in-UTF-mode.patch b/backport-pcre-use-UCP-in-UTF-mode.patch index 7eca2a9..ff1d9e9 100644 --- a/backport-pcre-use-UCP-in-UTF-mode.patch +++ b/backport-pcre-use-UCP-in-UTF-mode.patch @@ -52,11 +52,11 @@ index a107f4d..45b67ee 100644 { if (! localeinfo.using_utf8) die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales")); -- flags |= PCRE_UTF8; -+ flags |= (PCRE_UTF8 | PCRE_UCP); - } - - /* FIXME: Remove this restriction. */ +- flags |= PCRE2_UTF; ++ flags |= (PCRE2_UTF | PCRE2_UCP); + #if 0 + /* do not match individual code units but only UTF-8 */ + flags |= PCRE2_NEVER_BACKSLASH_C; diff --git a/tests/Makefile.am b/tests/Makefile.am index e0b0503..a47cf5c 100644 --- a/tests/Makefile.am @@ -106,4 +106,3 @@ index 0000000..4cd7db6 -- 2.33.0 - diff --git a/colorgrep.sh b/color_grep.sh similarity index 100% rename from colorgrep.sh rename to color_grep.sh diff --git a/grep.spec b/grep.spec index c4689d9..6e0b759 100644 --- a/grep.spec +++ b/grep.spec @@ -1,20 +1,26 @@ Name: grep Version: 3.7 -Release: 6 +Release: 9 Summary: A string search utility License: GPLv3+ URL: http://www.gnu.org/software/grep/ Source0: https://ftp.gnu.org/gnu/grep/grep-%{version}.tar.xz -Source1: colorgrep.sh +Source1: color_grep.sh Source2: colorgrep.csh Source3: grepconf.sh Patch1: backport-grep-avoid-sticky-problem-with-f-f.patch Patch2: backport-grep-s-does-not-suppress-binary-file-matches.patch Patch3: backport-grep-bug-backref-in-last-of-multiple-patter.patch -Patch4: backport-pcre-use-UCP-in-UTF-mode.patch - -BuildRequires: gcc pcre-devel >= 3.9-10 texinfo gettext libsigsegv-devel automake +Patch4: backport-fix-regex-compilation-memory-leaks.patch +Patch5: backport-grep-work-around-PCRE-bug.patch +Patch6: backport-grep-migrate-to-pcre2.patch +Patch7: backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch +Patch8: backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch +Patch9: backport-grep-fix-minor-P-memory-leak.patch +Patch10: backport-pcre-use-UCP-in-UTF-mode.patch + +BuildRequires: gcc pcre2-devel texinfo gettext libsigsegv-devel automake Provides: /bin/egrep /bin/fgrep /bin/grep bundled(gnulib) %description @@ -27,7 +33,7 @@ a specified pattern. By default, Grep outputs the matching lines. %build autoreconf %configure --disable-silent-rules \ -CPPFLAGS="-I%{_includedir}/pcre" CFLAGS="$RPM_OPT_FLAGS -fsigned-char" +CPPFLAGS="-I%{_includedir}/pcre2" CFLAGS="$RPM_OPT_FLAGS -fsigned-char" %make_build %install @@ -48,7 +54,8 @@ make check %files %{_datadir}/locale/* -%config(noreplace) %{_sysconfdir}/profile.d/colorgrep.*sh +%config(noreplace) %{_sysconfdir}/profile.d/color_grep.sh +%config(noreplace) %{_sysconfdir}/profile.d/colorgrep.csh %doc NEWS README THANKS TODO %license COPYING AUTHORS %{_bindir}/*grep @@ -58,9 +65,18 @@ make check %changelog -* Mon Apr 24 2023 gaoruoshu - 3.7-6 +* Tue Nov 7 2023 zhoupengcheng - 3.7-9 +- rename colorgrep.sh to color_grep.sh + +* Mon Nov 6 2023 zhoupengcheng - 3.7-8 +- Modify the dependency from pcre to pcre2 and fix bug + +* Mon Apr 24 2023 gaoruoshu - 3.7-7 - pcre: use UCP in UTF mode +* Mon Apr 3 2023 renchunhui - 3.7-6 +- backport patch from upstream + * Mon Dec 26 2022 gaoruoshu - 3.7-5 - backport patch from upstream -- Gitee