From 1da37c81e6e4145c66c93146f4fd4ab776e7d5b2 Mon Sep 17 00:00:00 2001 From: panxiaohe Date: Tue, 28 Jun 2022 17:37:08 +0800 Subject: [PATCH] sync bugfix patches for pcre2 --- ...-Don-t-limit-jitstack_max-to-INT_MAX.patch | 40 ++ backport-grep-fix-minor-P-memory-leak.patch | 26 + backport-grep-migrate-to-pcre2.patch | 648 ++---------------- ...p-speed-up-fix-bad-UTF8-check-with-P.patch | 51 ++ backport-grep-work-around-PCRE-bug.patch | 35 + grep.spec | 17 +- 6 files changed, 216 insertions(+), 601 deletions(-) create mode 100644 backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch create mode 100644 backport-grep-fix-minor-P-memory-leak.patch create mode 100644 backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch create mode 100644 backport-grep-work-around-PCRE-bug.patch diff --git a/backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch b/backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch new file mode 100644 index 0000000..4837774 --- /dev/null +++ b/backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch @@ -0,0 +1,40 @@ +From 6f84f3be1cdd3aadacc42007582116d1c2c0a3e4 Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Fri, 12 Nov 2021 21:30:25 -0800 +Subject: [PATCH] =?UTF-8?q?grep:=20Don=E2=80=99t=20limit=20jitstack=5Fmax?= + =?UTF-8?q?=20to=20INT=5FMAX?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +* src/pcresearch.c (jit_exec): Remove arbitrary INT_MAX limit on JIT +stack size. +--- + src/pcresearch.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/src/pcresearch.c b/src/pcresearch.c +index daa0c42..bf966f8 100644 +--- a/src/pcresearch.c ++++ b/src/pcresearch.c +@@ -59,10 +59,16 @@ jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes, + { + while (true) + { ++ /* STACK_GROWTH_RATE is taken from PCRE's src/pcre2_jit_compile.c. ++ Going over the jitstack_max limit could trigger an int ++ overflow bug within PCRE. */ ++ int STACK_GROWTH_RATE = 8192; ++ size_t jitstack_max = SIZE_MAX - (STACK_GROWTH_RATE - 1); ++ + int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes, + search_offset, options, pc->data, pc->mcontext); + if (e == PCRE2_ERROR_JIT_STACKLIMIT +- && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2) ++ && 0 < pc->jit_stack_size && pc->jit_stack_size <= jitstack_max / 2) + { + PCRE2_SIZE old_size = pc->jit_stack_size; + PCRE2_SIZE new_size = pc->jit_stack_size = old_size * 2; +-- +1.8.3.1 + diff --git a/backport-grep-fix-minor-P-memory-leak.patch b/backport-grep-fix-minor-P-memory-leak.patch new file mode 100644 index 0000000..2f985bc --- /dev/null +++ b/backport-grep-fix-minor-P-memory-leak.patch @@ -0,0 +1,26 @@ +From ad6e5cbcf598f55cafe83a11487ea4a6694e433b Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Sun, 14 Nov 2021 10:54:12 -0800 +Subject: [PATCH] grep: fix minor -P memory leak + +* src/pcresearch.c (Pcompile): Free ccontext when no longer needed. +--- + src/pcresearch.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/pcresearch.c b/src/pcresearch.c +index badcd4c..c287d99 100644 +--- a/src/pcresearch.c ++++ b/src/pcresearch.c +@@ -184,6 +184,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) + die (EXIT_TROUBLE, 0, "%s", ep); + } + ++ pcre2_compile_context_free (ccontext); ++ + pc->data = pcre2_match_data_create_from_pattern (pc->cre, NULL); + + ec = pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE); +-- +1.8.3.1 + diff --git a/backport-grep-migrate-to-pcre2.patch b/backport-grep-migrate-to-pcre2.patch index bcc74bc..ff71022 100644 --- a/backport-grep-migrate-to-pcre2.patch +++ b/backport-grep-migrate-to-pcre2.patch @@ -1,7 +1,7 @@ From e0d39a9133e1507345d73ac5aff85f037f39aa54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Fri, 12 Nov 2021 16:45:04 -0800 -Subject: grep: migrate to pcre2 +Subject: [PATCH] grep: migrate to pcre2 Mostly a bug by bug translation of the original code to the PCRE2 API. Code still could do with some optimizations but should be good as a @@ -23,566 +23,15 @@ Performance seems equivalent, and it also seems functionally complete. Use PCRE2, not the original PCRE. * tests/filename-lineno.pl: Adjust to match PCRE2 diagnostics. --- - 0001-grep-migrate-to-pcre2.patch | 543 +++++++++++++++++++++++++++++++ - doc/grep.in.1 | 8 +- - doc/grep.texi | 2 +- - m4/pcre.m4 | 21 +- - src/pcresearch.c | 244 +++++++------- - tests/filename-lineno.pl | 4 +- - 6 files changed, 681 insertions(+), 141 deletions(-) - create mode 100644 0001-grep-migrate-to-pcre2.patch + doc/grep.in.1 | 8 +- + doc/grep.texi | 2 +- + m4/pcre.m4 | 21 ++-- + src/pcresearch.c | 249 +++++++++++++++++++++++------------------------ + tests/filename-lineno.pl | 4 +- + 5 files changed, 138 insertions(+), 146 deletions(-) -diff --git a/0001-grep-migrate-to-pcre2.patch b/0001-grep-migrate-to-pcre2.patch -new file mode 100644 -index 0000000..8375f30 ---- /dev/null -+++ b/0001-grep-migrate-to-pcre2.patch -@@ -0,0 +1,543 @@ -+From 2b4c255e67ae835c18c5ec41f3b67dadfd190213 Mon Sep 17 00:00:00 2001 -+From: licihua -+Date: Sat, 14 May 2022 18:24:47 +0800 -+Subject: [PATCH 1/1] grep: migrate to pcre2 -+ -+--- -+ doc/grep.in.1 | 8 +- -+ doc/grep.texi | 2 +- -+ m4/pcre.m4 | 21 ++-- -+ src/pcresearch.c | 244 +++++++++++++++++++-------------------- -+ tests/filename-lineno.pl | 4 +- -+ 5 files changed, 138 insertions(+), 141 deletions(-) -+ -+diff --git a/doc/grep.in.1 b/doc/grep.in.1 -+index e8854f2..0178db1 100644 -+--- a/doc/grep.in.1 -++++ b/doc/grep.in.1 -+@@ -767,7 +767,7 @@ In other implementations, basic regular expressions are less powerful. -+ The following description applies to extended regular expressions; -+ differences for basic regular expressions are summarized afterwards. -+ Perl-compatible regular expressions give additional functionality, and are -+-documented in B(3) and B(3), but work only if -++documented in B(3) and B(3), but work only if -+ PCRE support is enabled. -+ .PP -+ The fundamental building blocks are the regular expressions -+@@ -1371,9 +1371,9 @@ from the globbing syntax that the shell uses to match file names. -+ .BR sort (1), -+ .BR xargs (1), -+ .BR read (2), -+-.BR pcre (3), -+-.BR pcresyntax (3), -+-.BR pcrepattern (3), -++.BR pcre2 (3), -++.BR pcre2syntax (3), -++.BR pcre2pattern (3), -+ .BR terminfo (5), -+ .BR glob (7), -+ .BR regex (7) -+diff --git a/doc/grep.texi b/doc/grep.texi -+index 01ac81e..aae8571 100644 -+--- a/doc/grep.texi -++++ b/doc/grep.texi -+@@ -1186,7 +1186,7 @@ In other implementations, basic regular expressions are less powerful. -+ The following description applies to extended regular expressions; -+ differences for basic regular expressions are summarized afterwards. -+ Perl-compatible regular expressions give additional functionality, and -+-are documented in the @i{pcresyntax}(3) and @i{pcrepattern}(3) manual -++are documented in the @i{pcre2syntax}(3) and @i{pcre2pattern}(3) manual -+ pages, but work only if PCRE is available in the system. -+ -+ @menu -+diff --git a/m4/pcre.m4 b/m4/pcre.m4 -+index 78b7fda..0ca510f 100644 -+--- a/m4/pcre.m4 -++++ b/m4/pcre.m4 -+@@ -1,4 +1,4 @@ -+-# pcre.m4 - check for libpcre support -++# pcre.m4 - check for PCRE library support -+ -+ # Copyright (C) 2010-2021 Free Software Foundation, Inc. -+ # This file is free software; the Free Software Foundation -+@@ -9,7 +9,7 @@ AC_DEFUN([gl_FUNC_PCRE], -+ [ -+ AC_ARG_ENABLE([perl-regexp], -+ AS_HELP_STRING([--disable-perl-regexp], -+- [disable perl-regexp (pcre) support]), -++ [disable perl-regexp (pcre2) support]), -+ [case $enableval in -+ yes|no) test_pcre=$enableval;; -+ *) AC_MSG_ERROR([invalid value $enableval for --disable-perl-regexp]);; -+@@ -21,24 +21,25 @@ AC_DEFUN([gl_FUNC_PCRE], -+ use_pcre=no -+ -+ if test $test_pcre != no; then -+- PKG_CHECK_MODULES([PCRE], [libpcre], [], [: ${PCRE_LIBS=-lpcre}]) -++ PKG_CHECK_MODULES([PCRE], [libpcre2-8], [], [: ${PCRE_LIBS=-lpcre2-8}]) -+ -+- AC_CACHE_CHECK([for pcre_compile], [pcre_cv_have_pcre_compile], -++ AC_CACHE_CHECK([for pcre2_compile], [pcre_cv_have_pcre2_compile], -+ [pcre_saved_CFLAGS=$CFLAGS -+ pcre_saved_LIBS=$LIBS -+ CFLAGS="$CFLAGS $PCRE_CFLAGS" -+ LIBS="$PCRE_LIBS $LIBS" -+ AC_LINK_IFELSE( -+- [AC_LANG_PROGRAM([[#include -++ [AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8 -++ #include -+ ]], -+- [[pcre *p = pcre_compile (0, 0, 0, 0, 0); -++ [[pcre2_code *p = pcre2_compile (0, 0, 0, 0, 0, 0); -+ return !p;]])], -+- [pcre_cv_have_pcre_compile=yes], -+- [pcre_cv_have_pcre_compile=no]) -++ [pcre_cv_have_pcre2_compile=yes], -++ [pcre_cv_have_pcre2_compile=no]) -+ CFLAGS=$pcre_saved_CFLAGS -+ LIBS=$pcre_saved_LIBS]) -+ -+- if test "$pcre_cv_have_pcre_compile" = yes; then -++ if test "$pcre_cv_have_pcre2_compile" = yes; then -+ use_pcre=yes -+ elif test $test_pcre = maybe; then -+ AC_MSG_WARN([AC_PACKAGE_NAME will be built without pcre support.]) -+@@ -50,7 +51,7 @@ AC_DEFUN([gl_FUNC_PCRE], -+ if test $use_pcre = yes; then -+ AC_DEFINE([HAVE_LIBPCRE], [1], -+ [Define to 1 if you have the Perl Compatible Regular Expressions -+- library (-lpcre).]) -++ library (-lpcre2).]) -+ else -+ PCRE_CFLAGS= -+ PCRE_LIBS= -+diff --git a/src/pcresearch.c b/src/pcresearch.c -+index 37f7e40..38dc010 100644 -+--- a/src/pcresearch.c -++++ b/src/pcresearch.c -+@@ -17,40 +17,32 @@ -+ 02110-1301, USA. */ -+ -+ /* Written August 1992 by Mike Haertel. */ -++/* Updated for PCRE2 by Carlo Arenas. */ -+ -+ #include -+ #include "search.h" -+ #include "die.h" -+ -+-#include -++#define PCRE2_CODE_UNIT_WIDTH 8 -++#include -+ -+-/* This must be at least 2; everything after that is for performance -+- in pcre_exec. */ -+-enum { NSUB = 300 }; -+- -+-#ifndef PCRE_EXTRA_MATCH_LIMIT_RECURSION -+-# define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0 -+-#endif -+-#ifndef PCRE_STUDY_JIT_COMPILE -+-# define PCRE_STUDY_JIT_COMPILE 0 -+-#endif -+-#ifndef PCRE_STUDY_EXTRA_NEEDED -+-# define PCRE_STUDY_EXTRA_NEEDED 0 -++/* Needed for backward compatibility for PCRE2 < 10.30 */ -++#ifndef PCRE2_CONFIG_DEPTHLIMIT -++#define PCRE2_CONFIG_DEPTHLIMIT PCRE2_CONFIG_RECURSIONLIMIT -++#define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT -++#define pcre2_set_depth_limit pcre2_set_recursion_limit -+ #endif -+ -+ struct pcre_comp -+ { -+- /* Compiled internal form of a Perl regular expression. */ -+- pcre *cre; -+- -+- /* Additional information about the pattern. */ -+- pcre_extra *extra; -+- -+-#if PCRE_STUDY_JIT_COMPILE -+ /* The JIT stack and its maximum size. */ -+- pcre_jit_stack *jit_stack; -+- int jit_stack_size; -+-#endif -++ pcre2_jit_stack *jit_stack; -++ PCRE2_SIZE jit_stack_size; -++ -++ /* Compiled internal form of a Perl regular expression. */ -++ pcre2_code *cre; -++ pcre2_match_context *mcontext; -++ pcre2_match_data *data; -+ -+ /* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty -+ string matches when that flag is used. */ -+@@ -60,51 +52,50 @@ struct pcre_comp -+ -+ /* Match the already-compiled PCRE pattern against the data in SUBJECT, -+ of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with -+- options OPTIONS, and storing resulting matches into SUB. Return -+- the (nonnegative) match location or a (negative) error number. */ -++ options OPTIONS. -++ Return the (nonnegative) match count or a (negative) error number. */ -+ static int -+-jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, -+- int search_offset, int options, int *sub) -++jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes, -++ PCRE2_SIZE search_offset, int options) -+ { -+ while (true) -+ { -+- int e = pcre_exec (pc->cre, pc->extra, subject, search_bytes, -+- search_offset, options, sub, NSUB); -+- -+-#if PCRE_STUDY_JIT_COMPILE -+- if (e == PCRE_ERROR_JIT_STACKLIMIT -++ int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes, -++ search_offset, options, pc->data, pc->mcontext); -++ if (e == PCRE2_ERROR_JIT_STACKLIMIT -+ && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2) -+ { -+- int old_size = pc->jit_stack_size; -+- int new_size = pc->jit_stack_size = old_size * 2; -++ PCRE2_SIZE old_size = pc->jit_stack_size; -++ PCRE2_SIZE new_size = pc->jit_stack_size = old_size * 2; -+ if (pc->jit_stack) -+- pcre_jit_stack_free (pc->jit_stack); -+- pc->jit_stack = pcre_jit_stack_alloc (old_size, new_size); -+- if (!pc->jit_stack) -++ pcre2_jit_stack_free (pc->jit_stack); -++ pc->jit_stack = pcre2_jit_stack_create (old_size, new_size, NULL); -++ -++ if (!pc->mcontext) -++ pc->mcontext = pcre2_match_context_create (NULL); -++ -++ if (!pc->jit_stack || !pc->mcontext) -+ die (EXIT_TROUBLE, 0, -+ _("failed to allocate memory for the PCRE JIT stack")); -+- pcre_assign_jit_stack (pc->extra, NULL, pc->jit_stack); -++ pcre2_jit_stack_assign (pc->mcontext, NULL, pc->jit_stack); -+ continue; -+ } -+-#endif -+ -+-#if PCRE_EXTRA_MATCH_LIMIT_RECURSION -+- if (e == PCRE_ERROR_RECURSIONLIMIT -+- && (PCRE_STUDY_EXTRA_NEEDED || pc->extra)) -++ -++ if (e == PCRE2_ERROR_DEPTHLIMIT) -+ { -+- unsigned long lim -+- = (pc->extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION -+- ? pc->extra->match_limit_recursion -+- : 0); -+- if (lim <= ULONG_MAX / 2) -+- { -+- pc->extra->match_limit_recursion = lim ? 2 * lim : (1 << 24) - 1; -+- pc->extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; -+- continue; -+- } -++ uint32_t lim; -++ pcre2_config (PCRE2_CONFIG_DEPTHLIMIT, &lim); -++ if (lim >= UINT32_MAX / 2) -++ return e; -++ -++ lim <<= 1; -++ if (!pc->mcontext) -++ pc->mcontext = pcre2_match_context_create (NULL); -++ -++ pcre2_set_depth_limit (pc->mcontext, lim); -++ continue; -+ } -+-#endif -+- -+ return e; -+ } -+ } -+@@ -115,27 +106,35 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, -+ void * -+ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) -+ { -+- int e; -+- char const *ep; -++ PCRE2_SIZE e; -++ int ec; -++ PCRE2_UCHAR8 ep[128]; /* 120 code units is suggested to avoid truncation */ -+ static char const wprefix[] = "(?cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ()); -++ pcre2_set_character_tables (ccontext, pcre2_maketables (NULL)); -++ pc->cre = pcre2_compile (re, n - (char *)re, flags, &ec, &e, ccontext); -+ if (!pc->cre) -+- die (EXIT_TROUBLE, 0, "%s", ep); -+- -+- int pcre_study_flags = PCRE_STUDY_EXTRA_NEEDED | PCRE_STUDY_JIT_COMPILE; -+- pc->extra = pcre_study (pc->cre, pcre_study_flags, &ep); -+- if (ep) -+- die (EXIT_TROUBLE, 0, "%s", ep); -++ { -++ pcre2_get_error_message (ec, ep, sizeof (ep)); -++ die (EXIT_TROUBLE, 0, "%s", ep); -++ } -+ -+-#if PCRE_STUDY_JIT_COMPILE -+- if (pcre_fullinfo (pc->cre, pc->extra, PCRE_INFO_JIT, &e)) -+- die (EXIT_TROUBLE, 0, _("internal error (should never happen)")); -++ pc->data = pcre2_match_data_create_from_pattern (pc->cre, NULL); -+ -+- /* The PCRE documentation says that a 32 KiB stack is the default. */ -+- if (e) -+- pc->jit_stack_size = 32 << 10; -+-#endif -++ ec = pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE); -++ if (ec && ec != PCRE2_ERROR_JIT_BADOPTION && ec != PCRE2_ERROR_NOMEMORY) -++ die (EXIT_TROUBLE, 0, _("JIT internal error: %d"), ec); -++ else -++ { -++ /* The PCRE documentation says that a 32 KiB stack is the default. */ -++ pc->jit_stack_size = 32 << 10; -++ } -+ -+ free (re); -+ -+- int sub[NSUB]; -+- pc->empty_match[false] = pcre_exec (pc->cre, pc->extra, "", 0, 0, -+- PCRE_NOTBOL, sub, NSUB); -+- pc->empty_match[true] = pcre_exec (pc->cre, pc->extra, "", 0, 0, 0, sub, -+- NSUB); -++ pc->empty_match[false] = jit_exec (pc, "", 0, 0, PCRE2_NOTBOL); -++ pc->empty_match[true] = jit_exec (pc, "", 0, 0, 0); -+ -+ return pc; -+ } -+@@ -206,15 +193,14 @@ size_t -+ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, -+ char const *start_ptr) -+ { -+- int sub[NSUB]; -+ char const *p = start_ptr ? start_ptr : buf; -+ bool bol = p[-1] == eolbyte; -+ char const *line_start = buf; -+- int e = PCRE_ERROR_NOMATCH; -++ int e = PCRE2_ERROR_NOMATCH; -+ char const *line_end; -+ struct pcre_comp *pc = vcp; -+- -+- /* The search address to pass to pcre_exec. This is the start of -++ PCRE2_SIZE *sub = pcre2_get_ovector_pointer (pc->data); -++ /* The search address to pass to PCRE. This is the start of -+ the buffer, or just past the most-recently discovered encoding -+ error or line end. */ -+ char const *subject = buf; -+@@ -226,14 +212,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, -+ better and the correctness issues were too puzzling. See -+ Bug#22655. */ -+ line_end = rawmemchr (p, eolbyte); -+- if (INT_MAX < line_end - p) -++ if (PCRE2_SIZE_MAX < line_end - p) -+ die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit")); -+ -+ for (;;) -+ { -+ /* Skip past bytes that are easily determined to be encoding -+ errors, treating them as data that cannot match. This is -+- faster than having pcre_exec check them. */ -++ faster than having PCRE check them. */ -+ while (localeinfo.sbclen[to_uchar (*p)] == -1) -+ { -+ p++; -+@@ -241,10 +227,10 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, -+ bol = false; -+ } -+ -+- int search_offset = p - subject; -++ PCRE2_SIZE search_offset = p - subject; -+ -+ /* Check for an empty match; this is faster than letting -+- pcre_exec do it. */ -++ PCRE do it. */ -+ if (p == line_end) -+ { -+ sub[0] = sub[1] = search_offset; -+@@ -254,13 +240,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, -+ -+ int options = 0; -+ if (!bol) -+- options |= PCRE_NOTBOL; -++ options |= PCRE2_NOTBOL; -+ -+- e = jit_exec (pc, subject, line_end - subject, search_offset, -+- options, sub); -+- if (e != PCRE_ERROR_BADUTF8) -++ e = jit_exec (pc, subject, line_end - subject, -++ search_offset, options); -++ /* PCRE2 provides 22 different error codes for bad UTF-8 */ -++ if (! (PCRE2_ERROR_UTF8_ERR21 <= e && e < PCRE2_ERROR_UTF8_ERR1)) -+ break; -+- int valid_bytes = sub[0]; -++ PCRE2_SIZE valid_bytes = pcre2_get_startchar (pc->data); -+ -+ if (search_offset <= valid_bytes) -+ { -+@@ -270,14 +257,15 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, -+ /* Handle the empty-match case specially, for speed. -+ This optimization is valid if VALID_BYTES is zero, -+ which means SEARCH_OFFSET is also zero. */ -++ sub[0] = valid_bytes; -+ sub[1] = 0; -+ e = pc->empty_match[bol]; -+ } -+ else -+ e = jit_exec (pc, subject, valid_bytes, search_offset, -+- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub); -++ options | PCRE2_NO_UTF_CHECK | PCRE2_NOTEOL); -+ -+- if (e != PCRE_ERROR_NOMATCH) -++ if (e != PCRE2_ERROR_NOMATCH) -+ break; -+ -+ /* Treat the encoding error as data that cannot match. */ -+@@ -288,7 +276,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, -+ subject += valid_bytes + 1; -+ } -+ -+- if (e != PCRE_ERROR_NOMATCH) -++ if (e != PCRE2_ERROR_NOMATCH) -+ break; -+ bol = true; -+ p = subject = line_start = line_end + 1; -+@@ -299,26 +287,34 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, -+ { -+ switch (e) -+ { -+- case PCRE_ERROR_NOMATCH: -++ case PCRE2_ERROR_NOMATCH: -+ break; -+ -+- case PCRE_ERROR_NOMEMORY: -++ case PCRE2_ERROR_NOMEMORY: -+ die (EXIT_TROUBLE, 0, _("%s: memory exhausted"), input_filename ()); -+ -+-#if PCRE_STUDY_JIT_COMPILE -+- case PCRE_ERROR_JIT_STACKLIMIT: -++ case PCRE2_ERROR_JIT_STACKLIMIT: -+ die (EXIT_TROUBLE, 0, _("%s: exhausted PCRE JIT stack"), -+ input_filename ()); -+-#endif -+ -+- case PCRE_ERROR_MATCHLIMIT: -++ case PCRE2_ERROR_MATCHLIMIT: -+ die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's backtracking limit"), -+ input_filename ()); -+ -+- case PCRE_ERROR_RECURSIONLIMIT: -+- die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's recursion limit"), -++ case PCRE2_ERROR_DEPTHLIMIT: -++ die (EXIT_TROUBLE, 0, -++ _("%s: exceeded PCRE's nested backtracking limit"), -++ input_filename ()); -++ -++ case PCRE2_ERROR_RECURSELOOP: -++ die (EXIT_TROUBLE, 0, _("%s: PCRE detected recurse loop"), -+ input_filename ()); -+ -++#ifdef PCRE2_ERROR_HEAPLIMIT -++ case PCRE2_ERROR_HEAPLIMIT: -++ die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's heap limit"), -++ input_filename ()); -+++#endif -+ default: -+ /* For now, we lump all remaining PCRE failures into this basket. -+ If anyone cares to provide sample grep usage that can trigger -+diff --git a/tests/filename-lineno.pl b/tests/filename-lineno.pl -+index 1e84b45..1ff3d6a 100755 -+--- a/tests/filename-lineno.pl -++++ b/tests/filename-lineno.pl -+@@ -101,13 +101,13 @@ my @Tests = -+ ], -+ ['invalid-re-P-paren', '-P ")"', {EXIT=>2}, -+ {ERR => $ENV{PCRE_WORKS} == 1 -+- ? "$prog: unmatched parentheses\n" -++ ? "$prog: unmatched closing parenthesis\n" -+ : $no_pcre -+ }, -+ ], -+ ['invalid-re-P-star-paren', '-P "a.*)"', {EXIT=>2}, -+ {ERR => $ENV{PCRE_WORKS} == 1 -+- ? "$prog: unmatched parentheses\n" -++ ? "$prog: unmatched closing parenthesis\n" -+ : $no_pcre -+ }, -+ ], -+-- -+2.26.2 -+ diff --git a/doc/grep.in.1 b/doc/grep.in.1 -index e8854f2..0178db1 100644 +index e8854f2..21bb471 100644 --- a/doc/grep.in.1 +++ b/doc/grep.in.1 @@ -767,7 +767,7 @@ In other implementations, basic regular expressions are less powerful. @@ -590,7 +39,7 @@ index e8854f2..0178db1 100644 differences for basic regular expressions are summarized afterwards. Perl-compatible regular expressions give additional functionality, and are -documented in B(3) and B(3), but work only if -+documented in B(3) and B(3), but work only if ++documented in B(3) and B(3), but work only if PCRE support is enabled. .PP The fundamental building blocks are the regular expressions @@ -621,7 +70,7 @@ index 01ac81e..aae8571 100644 @menu diff --git a/m4/pcre.m4 b/m4/pcre.m4 -index 78b7fda..0ca510f 100644 +index 78b7fda..a1c6c82 100644 --- a/m4/pcre.m4 +++ b/m4/pcre.m4 @@ -1,4 +1,4 @@ @@ -654,8 +103,8 @@ index 78b7fda..0ca510f 100644 LIBS="$PCRE_LIBS $LIBS" AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include -+ [AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8 -+ #include ++ [AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8 ++ #include ]], - [[pcre *p = pcre_compile (0, 0, 0, 0, 0); + [[pcre2_code *p = pcre2_compile (0, 0, 0, 0, 0, 0); @@ -682,10 +131,10 @@ index 78b7fda..0ca510f 100644 PCRE_CFLAGS= PCRE_LIBS= diff --git a/src/pcresearch.c b/src/pcresearch.c -index 37f7e40..caedf49 100644 +index 8070d06..2916d31 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c -@@ -17,40 +17,32 @@ +@@ -17,41 +17,32 @@ 02110-1301, USA. */ /* Written August 1992 by Mike Haertel. */ @@ -733,15 +182,15 @@ index 37f7e40..caedf49 100644 -#endif + pcre2_jit_stack *jit_stack; + PCRE2_SIZE jit_stack_size; -+ + + /* Compiled internal form of a Perl regular expression. */ + pcre2_code *cre; + pcre2_match_context *mcontext; + pcre2_match_data *data; - /* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty string matches when that flag is used. */ -@@ -60,51 +52,50 @@ struct pcre_comp + int empty_match[2]; +@@ -60,54 +51,49 @@ struct pcre_comp /* Match the already-compiled PCRE pattern against the data in SUBJECT, of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with @@ -761,16 +210,21 @@ index 37f7e40..caedf49 100644 - search_offset, options, sub, NSUB); - -#if PCRE_STUDY_JIT_COMPILE +- /* Going over this would trigger an int overflow bug within PCRE. */ +- int jitstack_max = INT_MAX - 8 * 1024; +- - if (e == PCRE_ERROR_JIT_STACKLIMIT +- && 0 < pc->jit_stack_size && pc->jit_stack_size <= jitstack_max / 2) + int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes, + search_offset, options, pc->data, pc->mcontext); + if (e == PCRE2_ERROR_JIT_STACKLIMIT - && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2) ++ && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2) { - int old_size = pc->jit_stack_size; - int new_size = pc->jit_stack_size = old_size * 2; + PCRE2_SIZE old_size = pc->jit_stack_size; + PCRE2_SIZE new_size = pc->jit_stack_size = old_size * 2; ++ if (pc->jit_stack) - pcre_jit_stack_free (pc->jit_stack); - pc->jit_stack = pcre_jit_stack_alloc (old_size, new_size); @@ -778,7 +232,7 @@ index 37f7e40..caedf49 100644 + pcre2_jit_stack_free (pc->jit_stack); + pc->jit_stack = pcre2_jit_stack_create (old_size, new_size, NULL); + -+ if (!pc->mcontext) ++ if (!pc->mcontext) + pc->mcontext = pcre2_match_context_create (NULL); + + if (!pc->jit_stack || !pc->mcontext) @@ -789,11 +243,10 @@ index 37f7e40..caedf49 100644 continue; } -#endif - +- -#if PCRE_EXTRA_MATCH_LIMIT_RECURSION - if (e == PCRE_ERROR_RECURSIONLIMIT - && (PCRE_STUDY_EXTRA_NEEDED || pc->extra)) -+ + if (e == PCRE2_ERROR_DEPTHLIMIT) { - unsigned long lim @@ -806,6 +259,8 @@ index 37f7e40..caedf49 100644 - pc->extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; - continue; - } +- } +-#endif + uint32_t lim; + pcre2_config (PCRE2_CONFIG_DEPTHLIMIT, &lim); + if (lim >= UINT32_MAX / 2) @@ -814,16 +269,14 @@ index 37f7e40..caedf49 100644 + lim <<= 1; + if (!pc->mcontext) + pc->mcontext = pcre2_match_context_create (NULL); -+ + + pcre2_set_depth_limit (pc->mcontext, lim); + continue; - } --#endif -- ++ } return e; } } -@@ -115,27 +106,35 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, +@@ -118,27 +104,35 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, void * Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) { @@ -867,10 +320,11 @@ index 37f7e40..caedf49 100644 } /* FIXME: Remove this restriction. */ -@@ -149,55 +148,43 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) +@@ -151,56 +145,42 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) + if (match_lines) strcpy (n, xprefix); n += strlen (n); - +- - /* The PCRE interface doesn't allow NUL bytes in the pattern, so - replace each NUL byte in the pattern with the four characters - "\000", removing a preceding backslash if there are an odd @@ -888,20 +342,19 @@ index 37f7e40..caedf49 100644 - n -= (pnul - p) & 1; - strcpy (n, "\\000"); - n += 4; -+ strcpy (n, wsuffix); -+ n += strlen(wsuffix); - } +- } - memcpy (n, p, patlim - p + 1); - n += patlim - p; - *patlim = '\n'; - +- - if (match_words) -- strcpy (n, wsuffix); + strcpy (n, wsuffix); ++ n += strlen(wsuffix); ++ } if (match_lines) -- strcpy (n, xsuffix); + { -+ strcpy (n, xsuffix); -+ n += strlen(xsuffix); + strcpy (n, xsuffix); ++ n += strlen(xsuffix); + } - pc->cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ()); @@ -949,7 +402,7 @@ index 37f7e40..caedf49 100644 return pc; } -@@ -206,15 +193,14 @@ size_t +@@ -209,15 +189,15 @@ size_t Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, char const *start_ptr) { @@ -961,14 +414,14 @@ index 37f7e40..caedf49 100644 + int e = PCRE2_ERROR_NOMATCH; char const *line_end; struct pcre_comp *pc = vcp; -- -- /* The search address to pass to pcre_exec. This is the start of + PCRE2_SIZE *sub = pcre2_get_ovector_pointer (pc->data); + +- /* The search address to pass to pcre_exec. This is the start of + /* The search address to pass to PCRE. This is the start of the buffer, or just past the most-recently discovered encoding error or line end. */ char const *subject = buf; -@@ -226,14 +212,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, +@@ -229,14 +209,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, better and the correctness issues were too puzzling. See Bug#22655. */ line_end = rawmemchr (p, eolbyte); @@ -985,7 +438,7 @@ index 37f7e40..caedf49 100644 while (localeinfo.sbclen[to_uchar (*p)] == -1) { p++; -@@ -241,10 +227,10 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, +@@ -244,10 +224,10 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, bol = false; } @@ -998,7 +451,7 @@ index 37f7e40..caedf49 100644 if (p == line_end) { sub[0] = sub[1] = search_offset; -@@ -254,13 +240,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, +@@ -257,13 +237,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, int options = 0; if (!bol) @@ -1018,7 +471,7 @@ index 37f7e40..caedf49 100644 if (search_offset <= valid_bytes) { -@@ -270,14 +257,15 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, +@@ -273,14 +254,15 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, /* Handle the empty-match case specially, for speed. This optimization is valid if VALID_BYTES is zero, which means SEARCH_OFFSET is also zero. */ @@ -1036,7 +489,7 @@ index 37f7e40..caedf49 100644 break; /* Treat the encoding error as data that cannot match. */ -@@ -288,7 +276,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, +@@ -291,7 +273,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, subject += valid_bytes + 1; } @@ -1045,7 +498,7 @@ index 37f7e40..caedf49 100644 break; bol = true; p = subject = line_start = line_end + 1; -@@ -299,26 +287,34 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, +@@ -302,26 +284,35 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, { switch (e) { @@ -1085,6 +538,7 @@ index 37f7e40..caedf49 100644 + die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's heap limit"), + input_filename ()); +#endif ++ default: /* For now, we lump all remaining PCRE failures into this basket. If anyone cares to provide sample grep usage that can trigger @@ -1109,5 +563,5 @@ index 1e84b45..1ff3d6a 100755 }, ], -- -2.26.2 +1.8.3.1 diff --git a/backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch b/backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch new file mode 100644 index 0000000..6297cfd --- /dev/null +++ b/backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch @@ -0,0 +1,51 @@ +From 6e1450408a7921771c41973761995e06445ba18b Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Sat, 13 Nov 2021 13:52:23 -0800 +Subject: [PATCH] grep: speed up, fix bad-UTF8 check with -P + +* src/pcresearch.c (bad_utf8_from_pcre2): New function. Fix bug +where PCRE2_ERROR_UTF8_ERR1 was not treated as an encoding error. +Improve performance when PCRE2_MATCH_INVALID_UTF is defined. +(Pexecute): Use it. +--- + src/pcresearch.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +diff --git a/src/pcresearch.c b/src/pcresearch.c +index 286e1dc..953aca2 100644 +--- a/src/pcresearch.c ++++ b/src/pcresearch.c +@@ -104,6 +104,18 @@ jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes, + } + } + ++/* Return true if E is an error code for bad UTF-8, and if pcre2_match ++ could return E because PCRE lacks PCRE2_MATCH_INVALID_UTF. */ ++static bool ++bad_utf8_from_pcre2 (int e) ++{ ++#ifdef PCRE2_MATCH_INVALID_UTF ++ return false; ++#else ++ return PCRE2_ERROR_UTF8_ERR21 <= e && e <= PCRE2_ERROR_UTF8_ERR1; ++#endif ++} ++ + /* Compile the -P style PATTERN, containing SIZE bytes that are + followed by '\n'. Return a description of the compiled pattern. */ + +@@ -248,9 +260,9 @@ Pexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size, + + e = jit_exec (pc, subject, line_end - subject, + search_offset, options); +- /* PCRE2 provides 22 different error codes for bad UTF-8 */ +- if (! (PCRE2_ERROR_UTF8_ERR21 <= e && e < PCRE2_ERROR_UTF8_ERR1)) ++ if (!bad_utf8_from_pcre2 (e)) + break; ++ + PCRE2_SIZE valid_bytes = pcre2_get_startchar (pc->data); + + if (search_offset <= valid_bytes) +-- +1.8.3.1 + diff --git a/backport-grep-work-around-PCRE-bug.patch b/backport-grep-work-around-PCRE-bug.patch new file mode 100644 index 0000000..bb9e25a --- /dev/null +++ b/backport-grep-work-around-PCRE-bug.patch @@ -0,0 +1,35 @@ +From b3a85a1a8a816f4f6f9c01399c16efe92a86ca06 Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Tue, 9 Nov 2021 10:11:42 -0800 +Subject: [PATCH] grep: work around PCRE bug +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Problem reported by Carlo Marcelo Arenas Belón (Bug#51710). +* src/pcresearch.c (jit_exec): Don’t attempt to grow the JIT stack +over INT_MAX - 8 * 1024. +--- + src/pcresearch.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/src/pcresearch.c b/src/pcresearch.c +index 3bdaee9..09f92c8 100644 +--- a/src/pcresearch.c ++++ b/src/pcresearch.c +@@ -72,8 +72,11 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, + search_offset, options, sub, NSUB); + + #if PCRE_STUDY_JIT_COMPILE ++ /* Going over this would trigger an int overflow bug within PCRE. */ ++ int jitstack_max = INT_MAX - 8 * 1024; ++ + if (e == PCRE_ERROR_JIT_STACKLIMIT +- && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2) ++ && 0 < pc->jit_stack_size && pc->jit_stack_size <= jitstack_max / 2) + { + int old_size = pc->jit_stack_size; + int new_size = pc->jit_stack_size = old_size * 2; +-- +1.8.3.1 + diff --git a/grep.spec b/grep.spec index 337c31a..b42ebd5 100644 --- a/grep.spec +++ b/grep.spec @@ -1,6 +1,6 @@ Name: grep Version: 3.7 -Release: 4 +Release: 5 Summary: A string search utility License: GPLv3+ URL: http://www.gnu.org/software/grep/ @@ -8,7 +8,11 @@ Source0: https://ftp.gnu.org/gnu/grep/grep-%{version}.tar.xz Patch1: backport-grep-avoid-sticky-problem-with-f-f.patch Patch2: backport-grep-s-does-not-suppress-binary-file-matches.patch -Patch3: backport-grep-migrate-to-pcre2.patch +Patch3: backport-grep-work-around-PCRE-bug.patch +Patch4: backport-grep-migrate-to-pcre2.patch +Patch5: backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch +Patch6: backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch +Patch7: backport-grep-fix-minor-P-memory-leak.patch BuildRequires: gcc pcre2-devel texinfo gettext libsigsegv-devel automake Provides: /bin/egrep /bin/fgrep /bin/grep bundled(gnulib) @@ -48,8 +52,13 @@ make check %changelog -* Sat May 14 2022 licihua -3.7-4 -- Modify the dependency from pcre to pcre2 +* Tue Jun 28 2022 panxiaohe - 3.7-5 +- grep: Don't limit jitstack_max to INT_MAX +- grep: speed up, fix bad-UTF8 check with -P +- grep: fix minor -P memory leak + +* Sat May 14 2022 licihua - 3.7-4 +- Modify the dependency from pcre to pcre2 * Fri Mar 18 2022 yangzhuangzhuang - 3.7-3 - The -s option no longer suppresses "binary file matches" messages -- Gitee