diff --git a/backport-Add-Perl-titlecasing-475.patch b/backport-Add-Perl-titlecasing-475.patch deleted file mode 100644 index 6cf03b5221118b0a883e432e846d5ed441d3d565..0000000000000000000000000000000000000000 --- a/backport-Add-Perl-titlecasing-475.patch +++ /dev/null @@ -1,69 +0,0 @@ -From f334e76dc765f23670e957413bae18c9d20b1d82 Mon Sep 17 00:00:00 2001 -From: Nicholas Wilson -Date: Mon, 16 Sep 2024 17:38:40 +0100 -Subject: [PATCH] Add Perl titlecasing (#475) - ---- - src/pcre2_substitute.c | 11 +++++++++++ - testdata/testinput2 | 3 +++ - testdata/testoutput2 | 4 ++++ - 3 files changed, 18 insertions(+) - -diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c -index 1ccef0660..83ddb8364 100644 ---- a/src/pcre2_substitute.c -+++ b/src/pcre2_substitute.c -@@ -839,6 +839,12 @@ do - forcecase = -1; - forcecasereset = 0; - ptr += 2; -+ if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_U) -+ { -+ /* Perl title-casing feature for \l\U (and \u\L) */ -+ forcecasereset = 1; -+ ptr += 2; -+ } - continue; - - case CHAR_U: -@@ -850,6 +856,11 @@ do - forcecase = 1; - forcecasereset = 0; - ptr += 2; -+ if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_L) -+ { -+ forcecasereset = -1; -+ ptr += 2; -+ } - continue; - - default: -diff --git a/testdata/testinput2 b/testdata/testinput2 -index 51e2095c8..7a836c994 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -4612,6 +4612,9 @@ B)x/alt_verbnames,mark - /a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended - abcDE - -+/(Hello)|wORLD/g,replace=>${1:+\l\U$0:\u\L$0}<,substitute_extended -+ Hello between wORLD -+ - /abcd/replace=xy\kz,substitute_extended - abcd - -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index eeb635d6d..7c71866b7 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -14854,6 +14854,10 @@ No match - abcDE - 1: aBcBCbcdEdeabAByzDone - -+/(Hello)|wORLD/g,replace=>${1:+\l\U$0:\u\L$0}<,substitute_extended -+ Hello between wORLD -+ 2: >hELLO< between >World< -+ - /abcd/replace=xy\kz,substitute_extended - abcd - Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string diff --git a/backport-Additional-PCRE2_EXTRA_ASCII_xxx-code.patch b/backport-Additional-PCRE2_EXTRA_ASCII_xxx-code.patch deleted file mode 100644 index 5b83ad70d52a7af886cb9d66b5c232ee333cf803..0000000000000000000000000000000000000000 --- a/backport-Additional-PCRE2_EXTRA_ASCII_xxx-code.patch +++ /dev/null @@ -1,1489 +0,0 @@ -From a6089462a460a9f6c2db63a86e1c09fabaa81499 Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Wed, 1 Feb 2023 17:42:29 +0000 -Subject: [PATCH] Additional PCRE2_EXTRA_ASCII_xxx code - -Conflict:NA -Reference:https://github.com/PCRE2Project/pcre2/commit/a6089462a460a9f6c2db63a86e1c09fabaa81499 - ---- - src/pcre2.h.in | 4 + - src/pcre2_compile.c | 375 ++++++++++++++++++++++++++----------------- - src/pcre2test.c | 21 ++- - testdata/testinput5 | 133 +++++++++++++++ - testdata/testinput7 | 133 +++++++++++++++ - testdata/testoutput5 | 179 +++++++++++++++++++++ - testdata/testoutput7 | 179 +++++++++++++++++++++ - 7 files changed, 869 insertions(+), 155 deletions(-) - -diff --git a/src/pcre2.h.in b/src/pcre2.h.in -index 11419a38..7202c633 100644 ---- a/src/pcre2.h.in -+++ b/src/pcre2.h.in -@@ -154,6 +154,10 @@ D is inspected during pcre2_dfa_match() execution - #define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */ - #define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */ - #define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */ -+#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */ -+#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */ -+#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */ -+#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */ - - /* These are for pcre2_jit_compile(). */ - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index ed2fe8a7..b8a9e098 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -123,7 +123,7 @@ static unsigned int - #endif - - static int -- compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *, -+ compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *, - uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, - compile_block *, PCRE2_SIZE *); - -@@ -694,8 +694,8 @@ static uint32_t chartypeoffset[] = { - now all in a single string, to reduce the number of relocations when a shared - library is dynamically loaded. The list of lengths is terminated by a zero - length entry. The first three must be alpha, lower, upper, as this is assumed --for handling case independence. The indices for graph, print, and punct are --needed, so identify them. */ -+for handling case independence. The indices for several classes are needed, so -+identify them. */ - - static const char posix_names[] = - STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 -@@ -785,7 +785,8 @@ are allowed. */ - (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \ - PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \ - PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \ -- PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) -+ PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK|PCRE2_EXTRA_ASCII_BSD| \ -+ PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX) - - /* Compile time error code numbers. They are given names so that they can more - easily be tracked. When a new number is added, the tables called eint1 and -@@ -1059,9 +1060,9 @@ for (;;) - case META_SKIP: fprintf(stderr, "META (*SKIP)"); break; - case META_THEN: fprintf(stderr, "META (*THEN)"); break; - -- case META_OPTIONS: -- fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr[0], pptr[1]); -- pptr += 2; -+ case META_OPTIONS: -+ fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr[0], pptr[1]); -+ pptr += 2; - break; - - case META_LOOKBEHIND: -@@ -1494,7 +1495,7 @@ Arguments: - chptr points to a returned data character - errorcodeptr points to the errorcode variable (containing zero) - options the current options bits -- xoptions the current extra options bits -+ xoptions the current extra options bits - isclass TRUE if inside a character class - cb compile data block or NULL when called from pcre2_substitute() - -@@ -2536,6 +2537,85 @@ return parsed_pattern; - - - -+/************************************************* -+* Handle \d, \D, \s, \S, \w, \W * -+*************************************************/ -+ -+/* This function is called from parse_regex() below, both for freestanding -+escapes, and those within classes, to handle those escapes that may change when -+Unicode property support is requested. Note that PCRE2_UCP will never be set -+without Unicode support because that is checked when pcre2_compile() is called. -+ -+Arguments: -+ escape the ESC_... value -+ parsed_pattern where to add the code -+ options options bits -+ xoptions extra options bits -+ -+Returns: updated value of parsed_pattern -+*/ -+static uint32_t * -+handle_escdsw(int escape, uint32_t *parsed_pattern, uint32_t options, -+ uint32_t xoptions) -+{ -+uint32_t ascii_option = 0; -+uint32_t prop = ESC_p; -+ -+switch(escape) -+ { -+ case ESC_D: -+ prop = ESC_P; -+ /* Fall through */ -+ case ESC_d: -+ ascii_option = PCRE2_EXTRA_ASCII_BSD; -+ break; -+ -+ case ESC_S: -+ prop = ESC_P; -+ /* Fall through */ -+ case ESC_s: -+ ascii_option = PCRE2_EXTRA_ASCII_BSS; -+ break; -+ -+ case ESC_W: -+ prop = ESC_P; -+ /* Fall through */ -+ case ESC_w: -+ ascii_option = PCRE2_EXTRA_ASCII_BSW; -+ break; -+ } -+ -+if ((options & PCRE2_UCP) == 0 || (xoptions & ascii_option) != 0) -+ { -+ *parsed_pattern++ = META_ESCAPE + escape; -+ } -+else -+ { -+ *parsed_pattern++ = META_ESCAPE + prop; -+ switch(escape) -+ { -+ case ESC_d: -+ case ESC_D: -+ *parsed_pattern++ = (PT_PC << 16) | ucp_Nd; -+ break; -+ -+ case ESC_s: -+ case ESC_S: -+ *parsed_pattern++ = PT_SPACE << 16; -+ break; -+ -+ case ESC_w: -+ case ESC_W: -+ *parsed_pattern++ = PT_WORD << 16; -+ break; -+ } -+ } -+ -+return parsed_pattern; -+} -+ -+ -+ - /************************************************* - * Parse regex and identify named groups * - *************************************************/ -@@ -2564,7 +2644,7 @@ typedef struct nest_save { - uint16_t max_group; - uint16_t flags; - uint32_t options; -- uint32_t xoptions; -+ uint32_t xoptions; - } nest_save; - - #define NSF_RESET 0x0001u -@@ -2579,8 +2659,11 @@ the main compiling phase. */ - #define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \ - PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \ - PCRE2_UNGREEDY) -- --#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT) -+ -+#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT) -+ -+#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT| \ -+ PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW) - - /* States used for analyzing ranges in character classes. The two OK values - must be last. */ -@@ -3115,9 +3198,7 @@ while (ptr < ptrend) - *parsed_pattern++ = META_ESCAPE + escape; - break; - -- /* Escapes that change in UCP mode. Note that PCRE2_UCP will never be set -- without Unicode support because it is checked when pcre2_compile() is -- called. */ -+ /* Escapes that may change in UCP mode. */ - - case ESC_d: - case ESC_D: -@@ -3126,33 +3207,8 @@ while (ptr < ptrend) - case ESC_w: - case ESC_W: - okquantifier = TRUE; -- if ((options & PCRE2_UCP) == 0) -- { -- *parsed_pattern++ = META_ESCAPE + escape; -- } -- else -- { -- *parsed_pattern++ = META_ESCAPE + -- ((escape == ESC_d || escape == ESC_s || escape == ESC_w)? -- ESC_p : ESC_P); -- switch(escape) -- { -- case ESC_d: -- case ESC_D: -- *parsed_pattern++ = (PT_PC << 16) | ucp_Nd; -- break; -- -- case ESC_s: -- case ESC_S: -- *parsed_pattern++ = PT_SPACE << 16; -- break; -- -- case ESC_w: -- case ESC_W: -- *parsed_pattern++ = PT_WORD << 16; -- break; -- } -- } -+ parsed_pattern = handle_escdsw(escape, parsed_pattern, options, -+ xoptions); - break; - - /* Unicode property matching */ -@@ -3515,18 +3571,22 @@ while (ptr < ptrend) - - class_range_state = RANGE_NO; - -- /* When PCRE2_UCP is set, some of the POSIX classes are converted to -- use Unicode properties \p or \P or, in one case, \h or \H. The -- substitutes table has two values per class, containing the type and -- value of a \p or \P item. The special cases are specified with a -- negative type: a non-zero value causes \h or \H to be used, and a zero -- value falls through to behave like a non-UCP POSIX class. */ -+ /* When PCRE2_UCP is set, unless PCRE2_EXTRA_ASCII_POSIX is set, some -+ of the POSIX classes are converted to use Unicode properties \p or \P -+ or, in one case, \h or \H. The substitutes table has two values per -+ class, containing the type and value of a \p or \P item. The special -+ cases are specified with a negative type: a non-zero value causes \h or -+ \H to be used, and a zero value falls through to behave like a non-UCP -+ POSIX class. There are now also some extra options that force ASCII for -+ some classes. */ - - #ifdef SUPPORT_UNICODE -- if ((options & PCRE2_UCP) != 0) -+ if ((options & PCRE2_UCP) != 0 && -+ (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0) - { - int ptype = posix_substitutes[2*posix_class]; - int pvalue = posix_substitutes[2*posix_class + 1]; -+ - if (ptype >= 0) - { - *parsed_pattern++ = META_ESCAPE + (posix_negate? ESC_P : ESC_p); -@@ -3664,7 +3724,7 @@ while (ptr < ptrend) - *parsed_pattern++ = META_ESCAPE + escape; - break; - -- /* These escapes are converted to Unicode property tests when -+ /* These escapes may be converted to Unicode property tests when - PCRE2_UCP is set. */ - - case ESC_d: -@@ -3673,33 +3733,8 @@ while (ptr < ptrend) - case ESC_S: - case ESC_w: - case ESC_W: -- if ((options & PCRE2_UCP) == 0) -- { -- *parsed_pattern++ = META_ESCAPE + escape; -- } -- else -- { -- *parsed_pattern++ = META_ESCAPE + -- ((escape == ESC_d || escape == ESC_s || escape == ESC_w)? -- ESC_p : ESC_P); -- switch(escape) -- { -- case ESC_d: -- case ESC_D: -- *parsed_pattern++ = (PT_PC << 16) | ucp_Nd; -- break; -- -- case ESC_s: -- case ESC_S: -- *parsed_pattern++ = PT_SPACE << 16; -- break; -- -- case ESC_w: -- case ESC_W: -- *parsed_pattern++ = PT_WORD << 16; -- break; -- } -- } -+ parsed_pattern = handle_escdsw(escape, parsed_pattern, options, -+ xoptions); - break; - - /* Explicit Unicode property matching */ -@@ -4052,7 +4087,7 @@ while (ptr < ptrend) - { - BOOL hyphenok = TRUE; - uint32_t oldoptions = options; -- uint32_t oldxoptions = xoptions; -+ uint32_t oldxoptions = xoptions; - - top_nest->reset_group = 0; - top_nest->max_group = 0; -@@ -4067,7 +4102,7 @@ while (ptr < ptrend) - { - options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| - PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE); -- xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT); -+ xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT); - hyphenok = FALSE; - ptr++; - } -@@ -4085,10 +4120,44 @@ while (ptr < ptrend) - goto FAILED; - } - optset = &unset; -- xoptset = &xunset; -+ xoptset = &xunset; - hyphenok = FALSE; - break; - -+ /* There are some two-character sequences that start with 'a'. */ -+ -+ case CHAR_a: -+ if (ptr < ptrend) -+ { -+ if (*ptr == CHAR_D) -+ { -+ *xoptset |= PCRE2_EXTRA_ASCII_BSD; -+ ptr++; -+ break; -+ } -+ if (*ptr == CHAR_P) -+ { -+ *xoptset |= PCRE2_EXTRA_ASCII_POSIX; -+ ptr++; -+ break; -+ } -+ if (*ptr == CHAR_S) -+ { -+ *xoptset |= PCRE2_EXTRA_ASCII_BSS; -+ ptr++; -+ break; -+ } -+ if (*ptr == CHAR_W) -+ { -+ *xoptset |= PCRE2_EXTRA_ASCII_BSW; -+ ptr++; -+ break; -+ } -+ } -+ *xoptset |= PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS| -+ PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX; -+ break; -+ - case CHAR_J: /* Record that it changed in the external options */ - *optset |= PCRE2_DUPNAMES; - cb->external_flags |= PCRE2_JCHANGED; -@@ -4097,7 +4166,7 @@ while (ptr < ptrend) - case CHAR_i: *optset |= PCRE2_CASELESS; break; - case CHAR_m: *optset |= PCRE2_MULTILINE; break; - case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break; -- case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break; -+ case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break; - case CHAR_s: *optset |= PCRE2_DOTALL; break; - case CHAR_U: *optset |= PCRE2_UNGREEDY; break; - -@@ -4757,7 +4826,7 @@ while (ptr < ptrend) - if (top_nest != NULL && top_nest->nest_depth == nest_depth) - { - options = (options & ~PARSE_TRACKED_OPTIONS) | top_nest->options; -- xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions; -+ xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions; - if ((top_nest->flags & NSF_RESET) != 0 && - top_nest->max_group > cb->bracount) - cb->bracount = top_nest->max_group; -@@ -5019,7 +5088,7 @@ Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits -- xoptions the extra options bits -+ xoptions the extra options bits - cb compile data - start start of range character - end end of range character -@@ -5030,7 +5099,7 @@ Returns: the number of < 256 characters added - - static unsigned int - add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, -- uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start, -+ uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start, - uint32_t end) - { - uint32_t c; -@@ -5039,7 +5108,7 @@ unsigned int n8 = 0; - - /* If caseless matching is required, scan the range and process alternate - cases. In Unicode, there are 8-bit characters that have alternate cases that --are greater than 255 and vice-versa (though these may be ignored if caseless -+are greater than 255 and vice-versa (though these may be ignored if caseless - restriction is in force). Sometimes we can just extend the original range. */ - - if ((options & PCRE2_CASELESS) != 0) -@@ -5053,17 +5122,17 @@ if ((options & PCRE2_CASELESS) != 0) - options &= ~PCRE2_CASELESS; /* Remove for recursive calls */ - c = start; - -- while ((rc = get_othercase_range(&c, end, &oc, &od, -+ while ((rc = get_othercase_range(&c, end, &oc, &od, - (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)) >= 0) - { - /* Handle a single character that has more than one other case. */ - -- if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, -+ if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, - options, xoptions, cb, PRIV(ucd_caseless_sets) + rc, oc); - - /* Do nothing if the other case range is within the original range. */ - -- else if (oc >= cb->class_range_start && od <= cb->class_range_end) -+ else if (oc >= cb->class_range_start && od <= cb->class_range_end) - continue; - - /* Extend the original range if there is overlap, noting that if oc < c, -@@ -5178,7 +5247,7 @@ Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits -- xoptions the extra options bits -+ xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - except character to omit; this is used when adding lists of -@@ -5191,7 +5260,7 @@ Returns: the number of < 256 characters added - - static unsigned int - add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, -- uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p, -+ uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p, - unsigned int except) - { - unsigned int n8 = 0; -@@ -5201,7 +5270,7 @@ while (p[0] < NOTACHAR) - if (p[0] != except) - { - while(p[n+1] == p[0] + n + 1) n++; -- n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, -+ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, - p[0], p[n]); - } - p += n + 1; -@@ -5223,7 +5292,7 @@ Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits -- xoptions the extra options bits -+ xoptions the extra options bits - cb compile data - start start of range character - end end of range character -@@ -5238,7 +5307,7 @@ add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, - { - cb->class_range_start = start; - cb->class_range_end = end; --return add_to_class_internal(classbits, uchardptr, options, xoptions, cb, -+return add_to_class_internal(classbits, uchardptr, options, xoptions, cb, - start, end); - } - -@@ -5257,7 +5326,7 @@ Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits -- xoptions the extra options bits -+ xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - except character to omit; this is used when adding lists of -@@ -5281,7 +5350,7 @@ while (p[0] < NOTACHAR) - while(p[n+1] == p[0] + n + 1) n++; - cb->class_range_start = p[0]; - cb->class_range_end = p[n]; -- n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, -+ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, - p[0], p[n]); - } - p += n + 1; -@@ -5302,7 +5371,7 @@ Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits -- xoptions the extra options bits -+ xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - -@@ -5412,7 +5481,7 @@ real compile phase. The value of lengthptr distinguishes the two phases. - - Arguments: - optionsptr pointer to the option bits -- xoptionsptr pointer to the extra option bits -+ xoptionsptr pointer to the extra option bits - codeptr points to the pointer to the current code point - pptrptr points to the current parsed pattern pointer - errorcodeptr points to error code variable -@@ -5431,10 +5500,10 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero - */ - - static int --compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr, -- PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr, -- uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, -- uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, -+compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr, -+ PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr, -+ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, -+ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, - PCRE2_SIZE *lengthptr) - { - int bravalue = 0; -@@ -5757,8 +5826,8 @@ for (;; pptr++) - uint32_t c = pptr[1]; - - #ifdef SUPPORT_UNICODE -- if (UCD_CASESET(c) == 0 || -- ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && -+ if (UCD_CASESET(c) == 0 || -+ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && - c < 128 && pptr[2] < 128)) - #endif - { -@@ -5851,41 +5920,45 @@ for (;; pptr++) - XCL_PROP/XCL_NOTPROP directly, which is done here. */ - - #ifdef SUPPORT_UNICODE -- if ((options & PCRE2_UCP) != 0) switch(posix_class) -+ if ((options & PCRE2_UCP) != 0 && -+ (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0) - { -- case PC_GRAPH: -- case PC_PRINT: -- case PC_PUNCT: -- *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; -- *class_uchardata++ = (PCRE2_UCHAR) -- ((posix_class == PC_GRAPH)? PT_PXGRAPH : -- (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT); -- *class_uchardata++ = 0; -- xclass_has_prop = TRUE; -- goto CONTINUE_CLASS; -- -- /* For the other POSIX classes (ascii, xdigit) we are going to -- fall through to the non-UCP case and build a bit map for -- characters with code points less than 256. However, if we are in -- a negated POSIX class, characters with code points greater than -- 255 must either all match or all not match, depending on whether -- the whole class is not or is negated. For example, for -- [[:^ascii:]... they must all match, whereas for [^[:^xdigit:]... -- they must not. -- -- In the special case where there are no xclass items, this is -- automatically handled by the use of OP_CLASS or OP_NCLASS, but an -- explicit range is needed for OP_XCLASS. Setting a flag here -- causes the range to be generated later when it is known that -- OP_XCLASS is required. In the 8-bit library this is relevant only in -- utf mode, since no wide characters can exist otherwise. */ -+ switch(posix_class) -+ { -+ case PC_GRAPH: -+ case PC_PRINT: -+ case PC_PUNCT: -+ *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; -+ *class_uchardata++ = (PCRE2_UCHAR) -+ ((posix_class == PC_GRAPH)? PT_PXGRAPH : -+ (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT); -+ *class_uchardata++ = 0; -+ xclass_has_prop = TRUE; -+ goto CONTINUE_CLASS; -+ -+ /* For the other POSIX classes (ascii, xdigit) we are going to -+ fall through to the non-UCP case and build a bit map for -+ characters with code points less than 256. However, if we are in -+ a negated POSIX class, characters with code points greater than -+ 255 must either all match or all not match, depending on whether -+ the whole class is not or is negated. For example, for -+ [[:^ascii:]... they must all match, whereas for [^[:^xdigit:]... -+ they must not. -+ -+ In the special case where there are no xclass items, this is -+ automatically handled by the use of OP_CLASS or OP_NCLASS, but an -+ explicit range is needed for OP_XCLASS. Setting a flag here -+ causes the range to be generated later when it is known that -+ OP_XCLASS is required. In the 8-bit library this is relevant only in -+ utf mode, since no wide characters can exist otherwise. */ - -- default: -+ default: - #if PCRE2_CODE_UNIT_WIDTH == 8 -- if (utf) -+ if (utf) - #endif -- match_all_or_no_wide_chars |= local_negate; -- break; -+ match_all_or_no_wide_chars |= local_negate; -+ break; -+ } - } - #endif /* SUPPORT_UNICODE */ - -@@ -6011,7 +6084,7 @@ for (;; pptr++) - - case ESC_h: - (void)add_list_to_class(classbits, &class_uchardata, -- options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list), -+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list), - NOTACHAR); - break; - -@@ -6022,7 +6095,7 @@ for (;; pptr++) - - case ESC_v: - (void)add_list_to_class(classbits, &class_uchardata, -- options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list), -+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list), - NOTACHAR); - break; - -@@ -6102,7 +6175,7 @@ for (;; pptr++) - if (C <= CHAR_i) - { - class_has_8bitchar += -- add_to_class(classbits, &class_uchardata, options, xoptions, -+ add_to_class(classbits, &class_uchardata, options, xoptions, - cb, C + uc, ((D < CHAR_i)? D : CHAR_i) + uc); - C = CHAR_j; - } -@@ -6110,7 +6183,7 @@ for (;; pptr++) - if (C <= D && C <= CHAR_r) - { - class_has_8bitchar += -- add_to_class(classbits, &class_uchardata, options, xoptions, -+ add_to_class(classbits, &class_uchardata, options, xoptions, - cb, C + uc, ((D < CHAR_r)? D : CHAR_r) + uc); - C = CHAR_s; - } -@@ -6118,7 +6191,7 @@ for (;; pptr++) - if (C <= D) - { - class_has_8bitchar += -- add_to_class(classbits, &class_uchardata, options, xoptions, -+ add_to_class(classbits, &class_uchardata, options, xoptions, - cb, C + uc, D + uc); - } - } -@@ -6126,7 +6199,7 @@ for (;; pptr++) - #endif - /* Not an EBCDIC special range */ - -- class_has_8bitchar += add_to_class(classbits, &class_uchardata, -+ class_has_8bitchar += add_to_class(classbits, &class_uchardata, - options, xoptions, cb, c, d); - goto CONTINUE_CLASS; /* Go get the next char in the class */ - } /* End of range handling */ -@@ -6135,7 +6208,7 @@ for (;; pptr++) - /* Handle a single character. */ - - class_has_8bitchar += -- add_to_class(classbits, &class_uchardata, options, xoptions, cb, -+ add_to_class(classbits, &class_uchardata, options, xoptions, cb, - meta, meta); - } - -@@ -6621,7 +6694,7 @@ for (;; pptr++) - if ((group_return = - compile_regex( - options, /* The options state */ -- xoptions, /* The extra options state */ -+ xoptions, /* The extra options state */ - &tempcode, /* Where to put code (updated) */ - &pptr, /* Input pointer (updated) */ - errorcodeptr, /* Where to put an error message */ -@@ -8020,7 +8093,7 @@ for (;; pptr++) - { - uint32_t caseset = UCD_CASESET(meta); - if (caseset != 0 && -- ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || -+ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || - PRIV(ucd_caseless_sets)[caseset] > 127)) - { - *code++ = OP_PROP; -@@ -8137,7 +8210,7 @@ the two phases. - - Arguments: - options option bits, including any changes for this subpattern -- xoptions extra option bits, ditto -+ xoptions extra option bits, ditto - codeptr -> the address of the current code pointer - pptrptr -> the address of the current parsed pattern pointer - errorcodeptr -> pointer to error code variable -@@ -8157,10 +8230,10 @@ Returns: 0 There has been an error - */ - - static int --compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr, -- uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits, -- uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, -- uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, -+compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr, -+ uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits, -+ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, -+ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, - PCRE2_SIZE *lengthptr) - { - PCRE2_UCHAR *code = *codeptr; -@@ -8257,7 +8330,7 @@ for (;;) - into the length. */ - - if ((branch_return = -- compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr, -+ compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr, - &branchfirstcu, &branchfirstcuflags, &branchreqcu, &branchreqcuflags, - &bc, cb, (lengthptr == NULL)? NULL : &length)) == 0) - return 0; -@@ -10292,7 +10365,7 @@ code = cworkspace; - *code = OP_BRA; - - (void)compile_regex(cb.external_options, ccontext->extra_options, &code, &pptr, -- &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, -+ &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, - &length); - - if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */ -@@ -10390,8 +10463,8 @@ of the function here. */ - pptr = cb.parsed_pattern; - code = (PCRE2_UCHAR *)codestart; - *code = OP_BRA; --regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code, -- &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, -+regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code, -+ &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, - &cb, NULL); - if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY; - re->top_bracket = cb.bracount; -diff --git a/src/pcre2test.c b/src/pcre2test.c -index 169c6181..6bae5bb5 100644 ---- a/src/pcre2test.c -+++ b/src/pcre2test.c -@@ -628,6 +628,9 @@ typedef struct modstruct { - PCRE2_SIZE offset; - } modstruct; - -+#define PCRE2_EXTRA_ASCII_ALL (PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS| \ -+ PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX) -+ - static modstruct modlist[] = { - { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) }, - { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) }, -@@ -642,6 +645,11 @@ static modstruct modlist[] = { - { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) }, - { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) }, - { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) }, -+ { "ascii_all", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_ALL, CO(extra_options) }, -+ { "ascii_bsd", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSD, CO(extra_options) }, -+ { "ascii_bss", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSS, CO(extra_options) }, -+ { "ascii_bsw", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSW, CO(extra_options) }, -+ { "ascii_posix", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_POSIX, CO(extra_options) }, - { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, - { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) }, - { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) }, -@@ -839,6 +847,7 @@ typedef struct c1modstruct { - static c1modstruct c1modlist[] = { - { "bincode", 'B', -1 }, - { "info", 'I', -1 }, -+ { "ascii_all", 'a', -1 }, - { "global", 'g', -1 }, - { "caseless", 'i', -1 }, - { "multiline", 'm', -1 }, -@@ -4283,15 +4292,19 @@ show_compile_extra_options(uint32_t options, const char *before, - const char *after) - { - if (options == 0) fprintf(outfile, "%s %s", before, after); --else fprintf(outfile, "%s%s%s%s%s%s%s%s%s", -+else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s", - before, - ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "", -+ ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " alt_bsux" : "", -+ ((options & PCRE2_EXTRA_ASCII_BSD) != 0)? " ascii_bsd" : "", -+ ((options & PCRE2_EXTRA_ASCII_BSS) != 0)? " ascii_bss" : "", -+ ((options & PCRE2_EXTRA_ASCII_BSW) != 0)? " ascii_bsw" : "", -+ ((options & PCRE2_EXTRA_ASCII_POSIX) != 0)? " ascii_posix" : "", - ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "", -- ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "", -+ ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "", -+ ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "", - ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "", - ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "", -- ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "", -- ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "", - after); - } - -diff --git a/testdata/testinput5 b/testdata/testinput5 -index b8174230..6e186cf0 100644 ---- a/testdata/testinput5 -+++ b/testdata/testinput5 -@@ -2309,4 +2309,137 @@ - - # End caseless restrict tests - -+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. -+ -+# DIGITS -+ -+/\d+/i,utf -+ 123\x{660}456 -+ -+/\d+/i,utf,ucp -+ 123\x{660}456 -+ -+/\d+/i,utf,ucp,ascii_bsd -+ 123\x{660}456 -+ -+/[\d]+/i,utf -+ 123\x{660}456 -+ -+/[\d]+/i,utf,ucp -+ 123\x{660}456 -+ -+/[\d]+/i,utf,ucp,ascii_bsd -+ 123\x{660}456 -+ -+/\d(?aD)\d(?-aD)\d/utf,ucp -+ \x{660}9\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+ -+/\d(?a)\d(?-a)\d/utf,ucp -+ \x{660}9\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+ -+# SPACES -+ -+/>\s+ < -+\= Expect no match -+ >\x{a0} < -+ -+/>\s+ < -+ >\x{a0} < -+ -+/>\s+ < -+\= Expect no match -+ >\x{a0} < -+ -+/>[\s]+ < -+\= Expect no match -+ >\x{a0} < -+ -+/>[\s]+ < -+ >\x{a0} < -+ -+/>[\s]+ < -+\= Expect no match -+ >\x{a0} < -+ -+/>\s(?aS)\s(?-aS)\s\x{a0} \x{a0}< -+\= Expect no match -+ >\x{a0}\x{a0}\x{a0}< -+ -+/>\s(?a)\s(?-a)\s\x{a0} \x{a0}< -+\= Expect no match -+ >\x{a0}\x{a0}\x{a0}< -+ -+# WORDS -+ -+/\w+/i,utf -+ 123\x{660}abc -+ -+/\w+/i,utf,ucp -+ 123\x{660}abc -+ -+/\w+/i,utf,ucp,ascii_bsw -+ 123\x{660}abc -+ -+/[\w]+/i,utf -+ 123\x{660}abc -+ -+/[\w]+/i,utf,ucp -+ 123\x{660}abc -+ -+/[\w]+/i,utf,ucp,ascii_bsw -+ 123\x{660}abc -+ -+/\w(?aW)\w(?-aW)\w/utf,ucp -+ \x{660}A\x{c0} -+\= Expect no match -+ \x{660}\x{c0}\x{c0} -+ -+/\w(?a)\w(?-a)\w/utf,ucp -+ \x{660}A\x{c0} -+\= Expect no match -+ \x{660}\x{c0}\x{c0} -+ -+# POSIX -+ -+/[[:digit:]]+/utf,ucp -+ 123\x{660}456 -+ -+/[[:digit:]]+/utf,ucp,ascii_posix -+ 123\x{660}456 -+ -+/>[[:space:]]+\x{a0} \x{a0}< -+ >\x{a0}\x{a0}\x{a0}< -+ -+/>[[:space:]]+\x{a0} \x{a0}< -+ -+/(?aP)[[:alnum:]]+/i,ucp,utf -+ abcáxyz -+ abc\x{660}xyz -+ -+/(?aP)[[:alnum:]\d]+/i,ucp,utf -+ abc\x{660}xyz -+ -+# VARIOUS -+ -+/[\d\s\w]+/a,ucp,utf -+ 9 A\x{660}À -+ 9 AÀ\x{660} -+ -+# End PCRE2_EXTRA_ASCII_xxx tests -+ - # End of testinput5 -diff --git a/testdata/testinput7 b/testdata/testinput7 -index 991de885..64a37ad2 100644 ---- a/testdata/testinput7 -+++ b/testdata/testinput7 -@@ -2328,4 +2328,137 @@ - - # End caseless restrict tests - -+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. -+ -+# DIGITS -+ -+/\d+/i,utf -+ 123\x{660}456 -+ -+/\d+/i,utf,ucp -+ 123\x{660}456 -+ -+/\d+/i,utf,ucp,ascii_bsd -+ 123\x{660}456 -+ -+/[\d]+/i,utf -+ 123\x{660}456 -+ -+/[\d]+/i,utf,ucp -+ 123\x{660}456 -+ -+/[\d]+/i,utf,ucp,ascii_bsd -+ 123\x{660}456 -+ -+/\d(?aD)\d(?-aD)\d/utf,ucp -+ \x{660}9\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+ -+/\d(?a)\d(?-a)\d/utf,ucp -+ \x{660}9\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+ -+# SPACES -+ -+/>\s+ < -+\= Expect no match -+ >\x{a0} < -+ -+/>\s+ < -+ >\x{a0} < -+ -+/>\s+ < -+\= Expect no match -+ >\x{a0} < -+ -+/>[\s]+ < -+\= Expect no match -+ >\x{a0} < -+ -+/>[\s]+ < -+ >\x{a0} < -+ -+/>[\s]+ < -+\= Expect no match -+ >\x{a0} < -+ -+/>\s(?aS)\s(?-aS)\s\x{a0} \x{a0}< -+\= Expect no match -+ >\x{a0}\x{a0}\x{a0}< -+ -+/>\s(?a)\s(?-a)\s\x{a0} \x{a0}< -+\= Expect no match -+ >\x{a0}\x{a0}\x{a0}< -+ -+# WORDS -+ -+/\w+/i,utf -+ 123\x{660}abc -+ -+/\w+/i,utf,ucp -+ 123\x{660}abc -+ -+/\w+/i,utf,ucp,ascii_bsw -+ 123\x{660}abc -+ -+/[\w]+/i,utf -+ 123\x{660}abc -+ -+/[\w]+/i,utf,ucp -+ 123\x{660}abc -+ -+/[\w]+/i,utf,ucp,ascii_bsw -+ 123\x{660}abc -+ -+/\w(?aW)\w(?-aW)\w/utf,ucp -+ \x{660}A\x{c0} -+\= Expect no match -+ \x{660}\x{c0}\x{c0} -+ -+/\w(?a)\w(?-a)\w/utf,ucp -+ \x{660}A\x{c0} -+\= Expect no match -+ \x{660}\x{c0}\x{c0} -+ -+# POSIX -+ -+/[[:digit:]]+/utf,ucp -+ 123\x{660}456 -+ -+/[[:digit:]]+/utf,ucp,ascii_posix -+ 123\x{660}456 -+ -+/>[[:space:]]+\x{a0} \x{a0}< -+ >\x{a0}\x{a0}\x{a0}< -+ -+/>[[:space:]]+\x{a0} \x{a0}< -+ -+/(?aP)[[:alnum:]]+/i,ucp,utf -+ abcáxyz -+ abc\x{660}xyz -+ -+/(?aP)[[:alnum:]\d]+/i,ucp,utf -+ abc\x{660}xyz -+ -+# VARIOUS -+ -+/[\d\s\w]+/a,ucp,utf -+ 9 A\x{660}À -+ 9 AÀ\x{660} -+ -+# End PCRE2_EXTRA_ASCII_xxx tests -+ - # End of testinput7 -diff --git a/testdata/testoutput5 b/testdata/testoutput5 -index db42a117..26972f70 100644 ---- a/testdata/testoutput5 -+++ b/testdata/testoutput5 -@@ -5196,4 +5196,183 @@ No match - - # End caseless restrict tests - -+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. -+ -+# DIGITS -+ -+/\d+/i,utf -+ 123\x{660}456 -+ 0: 123 -+ -+/\d+/i,utf,ucp -+ 123\x{660}456 -+ 0: 123\x{660}456 -+ -+/\d+/i,utf,ucp,ascii_bsd -+ 123\x{660}456 -+ 0: 123 -+ -+/[\d]+/i,utf -+ 123\x{660}456 -+ 0: 123 -+ -+/[\d]+/i,utf,ucp -+ 123\x{660}456 -+ 0: 123\x{660}456 -+ -+/[\d]+/i,utf,ucp,ascii_bsd -+ 123\x{660}456 -+ 0: 123 -+ -+/\d(?aD)\d(?-aD)\d/utf,ucp -+ \x{660}9\x{660} -+ 0: \x{660}9\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+No match -+ -+/\d(?a)\d(?-a)\d/utf,ucp -+ \x{660}9\x{660} -+ 0: \x{660}9\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+No match -+ -+# SPACES -+ -+/>\s+ < -+ 0: > < -+\= Expect no match -+ >\x{a0} < -+No match -+ -+/>\s+ < -+ 0: > < -+ >\x{a0} < -+ 0: >\x{a0} < -+ -+/>\s+ < -+ 0: > < -+\= Expect no match -+ >\x{a0} < -+No match -+ -+/>[\s]+ < -+ 0: > < -+\= Expect no match -+ >\x{a0} < -+No match -+ -+/>[\s]+ < -+ 0: > < -+ >\x{a0} < -+ 0: >\x{a0} < -+ -+/>[\s]+ < -+ 0: > < -+\= Expect no match -+ >\x{a0} < -+No match -+ -+/>\s(?aS)\s(?-aS)\s\x{a0} \x{a0}< -+ 0: >\x{a0} \x{a0}< -+\= Expect no match -+ >\x{a0}\x{a0}\x{a0}< -+No match -+ -+/>\s(?a)\s(?-a)\s\x{a0} \x{a0}< -+ 0: >\x{a0} \x{a0}< -+\= Expect no match -+ >\x{a0}\x{a0}\x{a0}< -+No match -+ -+# WORDS -+ -+/\w+/i,utf -+ 123\x{660}abc -+ 0: 123 -+ -+/\w+/i,utf,ucp -+ 123\x{660}abc -+ 0: 123\x{660}abc -+ -+/\w+/i,utf,ucp,ascii_bsw -+ 123\x{660}abc -+ 0: 123 -+ -+/[\w]+/i,utf -+ 123\x{660}abc -+ 0: 123 -+ -+/[\w]+/i,utf,ucp -+ 123\x{660}abc -+ 0: 123\x{660}abc -+ -+/[\w]+/i,utf,ucp,ascii_bsw -+ 123\x{660}abc -+ 0: 123 -+ -+/\w(?aW)\w(?-aW)\w/utf,ucp -+ \x{660}A\x{c0} -+ 0: \x{660}A\x{c0} -+\= Expect no match -+ \x{660}\x{c0}\x{c0} -+No match -+ -+/\w(?a)\w(?-a)\w/utf,ucp -+ \x{660}A\x{c0} -+ 0: \x{660}A\x{c0} -+\= Expect no match -+ \x{660}\x{c0}\x{c0} -+No match -+ -+# POSIX -+ -+/[[:digit:]]+/utf,ucp -+ 123\x{660}456 -+ 0: 123\x{660}456 -+ -+/[[:digit:]]+/utf,ucp,ascii_posix -+ 123\x{660}456 -+ 0: 123 -+ -+/>[[:space:]]+\x{a0} \x{a0}< -+ 0: >\x{a0} \x{a0}< -+ >\x{a0}\x{a0}\x{a0}< -+ 0: >\x{a0}\x{a0}\x{a0}< -+ -+/>[[:space:]]+\x{a0} \x{a0}< -+No match -+ -+/(?aP)[[:alnum:]]+/i,ucp,utf -+ abcáxyz -+ 0: abc -+ abc\x{660}xyz -+ 0: abc -+ -+/(?aP)[[:alnum:]\d]+/i,ucp,utf -+ abc\x{660}xyz -+ 0: abc\x{660}xyz -+ -+# VARIOUS -+ -+/[\d\s\w]+/a,ucp,utf -+ 9 A\x{660}À -+ 0: 9 A -+ 9 AÀ\x{660} -+ 0: 9 A -+ -+# End PCRE2_EXTRA_ASCII_xxx tests -+ - # End of testinput5 -diff --git a/testdata/testoutput7 b/testdata/testoutput7 -index c2291a10..c830748c 100644 ---- a/testdata/testoutput7 -+++ b/testdata/testoutput7 -@@ -3936,4 +3936,183 @@ No match - - # End caseless restrict tests - -+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. -+ -+# DIGITS -+ -+/\d+/i,utf -+ 123\x{660}456 -+ 0: 123 -+ -+/\d+/i,utf,ucp -+ 123\x{660}456 -+ 0: 123\x{660}456 -+ -+/\d+/i,utf,ucp,ascii_bsd -+ 123\x{660}456 -+ 0: 123 -+ -+/[\d]+/i,utf -+ 123\x{660}456 -+ 0: 123 -+ -+/[\d]+/i,utf,ucp -+ 123\x{660}456 -+ 0: 123\x{660}456 -+ -+/[\d]+/i,utf,ucp,ascii_bsd -+ 123\x{660}456 -+ 0: 123 -+ -+/\d(?aD)\d(?-aD)\d/utf,ucp -+ \x{660}9\x{660} -+ 0: \x{660}9\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+No match -+ -+/\d(?a)\d(?-a)\d/utf,ucp -+ \x{660}9\x{660} -+ 0: \x{660}9\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+No match -+ -+# SPACES -+ -+/>\s+ < -+ 0: > < -+\= Expect no match -+ >\x{a0} < -+No match -+ -+/>\s+ < -+ 0: > < -+ >\x{a0} < -+ 0: >\x{a0} < -+ -+/>\s+ < -+ 0: > < -+\= Expect no match -+ >\x{a0} < -+No match -+ -+/>[\s]+ < -+ 0: > < -+\= Expect no match -+ >\x{a0} < -+No match -+ -+/>[\s]+ < -+ 0: > < -+ >\x{a0} < -+ 0: >\x{a0} < -+ -+/>[\s]+ < -+ 0: > < -+\= Expect no match -+ >\x{a0} < -+No match -+ -+/>\s(?aS)\s(?-aS)\s\x{a0} \x{a0}< -+ 0: >\x{a0} \x{a0}< -+\= Expect no match -+ >\x{a0}\x{a0}\x{a0}< -+No match -+ -+/>\s(?a)\s(?-a)\s\x{a0} \x{a0}< -+ 0: >\x{a0} \x{a0}< -+\= Expect no match -+ >\x{a0}\x{a0}\x{a0}< -+No match -+ -+# WORDS -+ -+/\w+/i,utf -+ 123\x{660}abc -+ 0: 123 -+ -+/\w+/i,utf,ucp -+ 123\x{660}abc -+ 0: 123\x{660}abc -+ -+/\w+/i,utf,ucp,ascii_bsw -+ 123\x{660}abc -+ 0: 123 -+ -+/[\w]+/i,utf -+ 123\x{660}abc -+ 0: 123 -+ -+/[\w]+/i,utf,ucp -+ 123\x{660}abc -+ 0: 123\x{660}abc -+ -+/[\w]+/i,utf,ucp,ascii_bsw -+ 123\x{660}abc -+ 0: 123 -+ -+/\w(?aW)\w(?-aW)\w/utf,ucp -+ \x{660}A\x{c0} -+ 0: \x{660}A\x{c0} -+\= Expect no match -+ \x{660}\x{c0}\x{c0} -+No match -+ -+/\w(?a)\w(?-a)\w/utf,ucp -+ \x{660}A\x{c0} -+ 0: \x{660}A\x{c0} -+\= Expect no match -+ \x{660}\x{c0}\x{c0} -+No match -+ -+# POSIX -+ -+/[[:digit:]]+/utf,ucp -+ 123\x{660}456 -+ 0: 123\x{660}456 -+ -+/[[:digit:]]+/utf,ucp,ascii_posix -+ 123\x{660}456 -+ 0: 123 -+ -+/>[[:space:]]+\x{a0} \x{a0}< -+ 0: >\x{a0} \x{a0}< -+ >\x{a0}\x{a0}\x{a0}< -+ 0: >\x{a0}\x{a0}\x{a0}< -+ -+/>[[:space:]]+\x{a0} \x{a0}< -+No match -+ -+/(?aP)[[:alnum:]]+/i,ucp,utf -+ abcáxyz -+ 0: abc -+ abc\x{660}xyz -+ 0: abc -+ -+/(?aP)[[:alnum:]\d]+/i,ucp,utf -+ abc\x{660}xyz -+ 0: abc\x{660}xyz -+ -+# VARIOUS -+ -+/[\d\s\w]+/a,ucp,utf -+ 9 A\x{660}À -+ 0: 9 A -+ 9 AÀ\x{660} -+ 0: 9 A -+ -+# End PCRE2_EXTRA_ASCII_xxx tests -+ - # End of testinput7 --- -2.23.0 - diff --git a/backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch b/backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch deleted file mode 100644 index 9a5ad8b71c95406ceac23186061487b6110a9f33..0000000000000000000000000000000000000000 --- a/backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch +++ /dev/null @@ -1,116 +0,0 @@ -From c1306126c3f12c16ad62dd2553132f64a28ca607 Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Sun, 19 Nov 2023 17:18:07 +0000 -Subject: [PATCH] Fix 32-bit quantifier following a character larger than the - maximum UTF character. - ---- - src/pcre2_compile.c | 11 ++++++++--- - testdata/testinput12 | 6 ++++++ - testdata/testoutput12-16 | 7 +++++++ - testdata/testoutput12-32 | 7 +++++++ - 4 files changed, 28 insertions(+), 3 deletions(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index b3e4969..fdaf2ad 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -2781,6 +2781,7 @@ uint32_t *verbstartptr = NULL; - uint32_t *previous_callout = NULL; - uint32_t *parsed_pattern = cb->parsed_pattern; - uint32_t *parsed_pattern_end = cb->parsed_pattern_end; -+uint32_t *this_parsed_item = NULL; - uint32_t meta_quantifier = 0; - uint32_t add_after_mark = 0; - uint32_t extra_options = cb->cx->extra_options; -@@ -2866,10 +2867,11 @@ while (ptr < ptrend) - uint32_t set, unset, *optset; - uint32_t terminator; - uint32_t prev_meta_quantifier; -+ uint32_t *prev_parsed_item = this_parsed_item; - BOOL prev_okquantifier; - PCRE2_SPTR tempptr; - PCRE2_SIZE offset; -- -+ - if (parsed_pattern >= parsed_pattern_end) - { - errorcode = ERR63; /* Internal error (parsed pattern overflow) */ -@@ -2881,6 +2883,10 @@ while (ptr < ptrend) - errorcode = ERR19; - goto FAILED; /* Parentheses too deeply nested */ - } -+ -+ /* Remember where this item started */ -+ -+ this_parsed_item = parsed_pattern; - - /* Get next input character, save its position for callout handling. */ - -@@ -3173,7 +3179,6 @@ while (ptr < ptrend) - continue; /* Next character in pattern */ - } - -- - /* Process the next item in the main part of a pattern. */ - - switch(c) -@@ -3450,7 +3455,7 @@ while (ptr < ptrend) - wrapping it in non-capturing brackets, but we have to allow for a preceding - (*MARK) for when (*ACCEPT) has an argument. */ - -- if (parsed_pattern[-1] == META_ACCEPT) -+ if (*prev_parsed_item == META_ACCEPT) - { - uint32_t *p; - for (p = parsed_pattern - 1; p >= verbstartptr; p--) p[1] = p[0]; -diff --git a/testdata/testinput12 b/testdata/testinput12 -index 7a85eb5..1e552e6 100644 ---- a/testdata/testinput12 -+++ b/testdata/testinput12 -@@ -560,4 +560,10 @@ - - # ---------------------------------------------------- - -+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This -+# fails in 16-bit mode, but is OK for 32-bit. -+ -+/\x{802a0000}*/ -+ \x{802a0000}\x{802a0000} -+ - # End of testinput12 -diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 -index 9867632..8cbc13d 100644 ---- a/testdata/testoutput12-16 -+++ b/testdata/testoutput12-16 -@@ -1803,4 +1803,11 @@ No match - - # ---------------------------------------------------- - -+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This -+# fails in 16-bit mode, but is OK for 32-bit. -+ -+/\x{802a0000}*/ -+Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large -+ \x{802a0000}\x{802a0000} -+ - # End of testinput12 -diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 -index 3a20dd4..1a98b4b 100644 ---- a/testdata/testoutput12-32 -+++ b/testdata/testoutput12-32 -@@ -1801,4 +1801,11 @@ No match - - # ---------------------------------------------------- - -+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This -+# fails in 16-bit mode, but is OK for 32-bit. -+ -+/\x{802a0000}*/ -+ \x{802a0000}\x{802a0000} -+ 0: \x{802a0000}\x{802a0000} -+ - # End of testinput12 --- -2.33.0 - diff --git a/backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch b/backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch deleted file mode 100644 index 3be576672143231b36e11262728b56c651d83f59..0000000000000000000000000000000000000000 --- a/backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 45dcb3de900b77583f4e9daa663004c55fad4794 Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg -Date: Wed, 22 Nov 2023 10:22:59 +0000 -Subject: [PATCH] Fix \X matching in 32 bit mode without UTF in JIT - ---- - src/pcre2_jit_compile.c | 6 +++--- - testdata/testinput12 | 4 ++++ - testdata/testoutput12-16 | 9 +++++++++ - testdata/testoutput12-32 | 5 +++++ - 4 files changed, 21 insertions(+), 3 deletions(-) - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 510c392..8d64e1c 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -8718,7 +8718,7 @@ c = *cc++; - - #if PCRE2_CODE_UNIT_WIDTH == 32 - if (c >= 0x110000) -- return NULL; -+ return cc; - #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ - lgb = UCD_GRAPHBREAK(c); - -@@ -8958,7 +8958,7 @@ switch(type) - #else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, - common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); -- if (!common->utf || common->invalid_utf) -+ if (common->invalid_utf) - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); - #endif - -@@ -12044,7 +12044,7 @@ switch(opcode) - } - - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -- if (common->utf) -+ if (type == OP_EXTUNI || common->utf) - { - OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); - detect_partial_match(common, &no_match); -diff --git a/testdata/testinput12 b/testdata/testinput12 -index 5a2d8d2..a6678bb 100644 ---- a/testdata/testinput12 -+++ b/testdata/testinput12 -@@ -569,4 +569,8 @@ - /\x{802a0000}*/ - \x{802a0000}\x{802a0000} - -+# UTF matching without UTF, check invalid UTF characters -+/\X++/ -+ a\x{110000}\x{ffffffff} -+ - # End of testinput12 -diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 -index 9ac403e..f3b40a3 100644 ---- a/testdata/testoutput12-16 -+++ b/testdata/testoutput12-16 -@@ -1814,4 +1814,13 @@ No match - Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large - \x{802a0000}\x{802a0000} - -+# UTF matching without UTF, check invalid UTF characters -+/\X++/ -+ a\x{110000}\x{ffffffff} -+** Character \x{110000} is greater than 0xffff and UTF-16 mode is not enabled. -+** Truncation will probably give the wrong result. -+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. -+** Truncation will probably give the wrong result. -+ 0: a\x00\x{ffff} -+ - # End of testinput12 -diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 -index 9396305..dd42f86 100644 ---- a/testdata/testoutput12-32 -+++ b/testdata/testoutput12-32 -@@ -1812,4 +1812,9 @@ No match - \x{802a0000}\x{802a0000} - 0: \x{802a0000}\x{802a0000} - -+# UTF matching without UTF, check invalid UTF characters -+/\X++/ -+ a\x{110000}\x{ffffffff} -+ 0: a\x{110000}\x{ffffffff} -+ - # End of testinput12 --- -2.33.0 - diff --git a/backport-Fix-accept-and-endanchored-interaction-in-JIT.patch b/backport-Fix-accept-and-endanchored-interaction-in-JIT.patch deleted file mode 100644 index 1cbfa0e968738257a941bea058b530b6baf8ba09..0000000000000000000000000000000000000000 --- a/backport-Fix-accept-and-endanchored-interaction-in-JIT.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 1c09efe6b0008a3b463299efe7501bc3140806f3 Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg -Date: Wed, 6 Dec 2023 10:06:50 +0000 -Subject: [PATCH] Fix accept and endanchored interaction in JIT - ---- - src/pcre2_jit_compile.c | 15 ++++++++++++--- - src/pcre2_jit_test.c | 1 + - 2 files changed, 13 insertions(+), 3 deletions(-) - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 2e11c3c..849e2c8 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -489,6 +489,8 @@ typedef struct compiler_common { - jump_list *casefulcmp; - jump_list *caselesscmp; - jump_list *reset_match; -+ /* Same as reset_match, but resets the STR_PTR as well. */ -+ jump_list *restart_match; - BOOL unset_backref; - BOOL alt_circumflex; - #ifdef SUPPORT_UNICODE -@@ -3146,7 +3148,7 @@ return (value & (value - 1)) == 0; - - static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) - { --while (list) -+while (list != NULL) - { - /* sljit_set_label is clever enough to do nothing - if either the jump or the label is NULL. */ -@@ -12187,7 +12189,7 @@ if (*cc == OP_FAIL) - } - - if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0) -- add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); -+ add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); - - if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty) - { -@@ -14552,10 +14554,17 @@ if (common->caselesscmp != NULL) - set_jumps(common->caselesscmp, LABEL()); - do_caselesscmp(common); - } --if (common->reset_match != NULL) -+if (common->reset_match != NULL || common->restart_match != NULL) - { -+ if (common->restart_match != NULL) -+ { -+ set_jumps(common->restart_match, LABEL()); -+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); -+ } -+ - set_jumps(common->reset_match, LABEL()); - do_reset_match(common, (re->top_bracket + 1) * 2); -+ /* The value of restart_match is in TMP1. */ - CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); - OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); - JUMPTO(SLJIT_JUMP, reset_match_label); -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index b5d95d5..0974d19 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -655,6 +655,7 @@ static struct regression_test_case regression_test_cases[] = { - { MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" }, - { MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" }, - { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" }, -+ { MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" }, - - /* Conditional blocks. */ - { MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" }, --- -2.33.0 - diff --git a/backport-Fix-an-invalid-match-of-ascii-word-classes-when-inva.patch b/backport-Fix-an-invalid-match-of-ascii-word-classes-when-inva.patch deleted file mode 100644 index 82d04e2bfe581d9b28b204264f5a1ad67a64bee4..0000000000000000000000000000000000000000 --- a/backport-Fix-an-invalid-match-of-ascii-word-classes-when-inva.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 2c08b619dc973beacc474dcb67cda8cd366200ce Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg -Date: Tue, 11 Apr 2023 12:42:11 +0000 -Subject: [PATCH] Fix an invalid match of ascii word classes when invalid utf - is enabled - -Fixes #224 -Conflict:delete changelog -Reference:https://github.com/PCRE2Project/pcre2/commit/2c08b619dc973beacc474dcb67cda8cd366200ce ---- - src/pcre2_jit_compile.c | 1 + - src/pcre2_jit_test.c | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 75ba610..81b7a93 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -4132,6 +4132,7 @@ if (negated) - - if (common->invalid_utf) - { -+ OP1(SLJIT_MOV, TMP1, 0, TMP2, 0); - add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index e1f0bbc..1a1f6c5 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -1979,6 +1979,7 @@ static const struct invalid_utf8_regression_test_case invalid_utf8_regression_te - { PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" }, - - { PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" }, -+ { PCRE2_UTF, CI, 0, 0, 0, 1, 4, { "[\\D]", NULL }, "@\xe0\xab\xaa@" }, - - /* These two are not invalid UTF tests, but this infrastructure fits better for them. */ - { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" }, --- -2.27.0 - diff --git a/backport-Fix-another-oversight-in-c1306126.patch b/backport-Fix-another-oversight-in-c1306126.patch deleted file mode 100644 index 1cfdd577e8c3604e0e3cb807cf2ddfad32d96952..0000000000000000000000000000000000000000 --- a/backport-Fix-another-oversight-in-c1306126.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 04f6668a09c51cf10fa5514019843ab0af9724c8 Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Tue, 21 Nov 2023 15:10:34 +0000 -Subject: [PATCH] Fix another oversight in c1306126 - ---- - src/pcre2_compile.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 9e45580..7b522c5 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -3108,8 +3108,11 @@ while (ptr < ptrend) - !read_repeat_counts(&tempptr, ptrend, NULL, NULL, &errorcode)))) - { - if (after_manual_callout-- <= 0) -+ { - parsed_pattern = manage_callouts(thisptr, &previous_callout, auto_callout, - parsed_pattern, cb); -+ this_parsed_item = parsed_pattern; /* New start for current item */ -+ } - } - - /* If expect_cond_assert is 2, we have just passed (?( and are expecting an --- -2.33.0 - diff --git a/backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch b/backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch deleted file mode 100644 index 565286d172b70ed00cf2f21e2e5c08280f94c72a..0000000000000000000000000000000000000000 --- a/backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 936fef2a4480b21f5c43b207181097736fb311e3 Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg -Date: Wed, 22 Nov 2023 11:50:38 +0000 -Subject: [PATCH] Fix backref iterators when PCRE2_MATCH_UNSET_BACKREF is set - in JIT - ---- - src/pcre2_jit_compile.c | 4 +++- - src/pcre2_jit_test.c | 1 + - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 8d64e1c..8110d8c 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -9539,9 +9539,11 @@ if (!minimize) - if (ref) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); -+ - if (ref) - { -- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); -+ if (!common->unset_backref) -+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); - zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); - } - else -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index b27cec7..8bff3dc 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -595,6 +595,7 @@ static struct regression_test_case regression_test_cases[] = { - { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{1,3}M", "aaaaaaaabbbbaabbbbm" }, - { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" }, - { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{2,3}?", "aaaabbbbaaaabbbbbbbbbb" }, -+ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" }, - - /* Assertions. */ - { MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" }, --- -2.33.0 - diff --git a/backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch b/backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch deleted file mode 100644 index 1969d6ac3d754cdf79596c2aede4297d1909d6af..0000000000000000000000000000000000000000 --- a/backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 9de4d53cf850e0fca625ce9d80c12bea5b2a5ab9 Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg -Date: Thu, 7 Dec 2023 09:03:24 +0000 -Subject: [PATCH] Fix backreferences with unset backref and non-greedy - iterators in JIT - ---- - src/pcre2_jit_compile.c | 3 ++- - src/pcre2_jit_test.c | 1 + - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 0f445e1..e1daa1e 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -9653,7 +9653,8 @@ else - { - if (ref) - { -- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); -+ if (!common->unset_backref) -+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); - zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); - } - else -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index 0974d19..9b63c8e 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -596,6 +596,7 @@ static struct regression_test_case regression_test_cases[] = { - { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" }, - { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{2,3}?", "aaaabbbbaaaabbbbbbbbbb" }, - { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" }, -+ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" }, - - /* Assertions. */ - { MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" }, --- -2.33.0 - diff --git a/backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch b/backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch deleted file mode 100644 index b4dd80a9d79f8feb0e9d9ef49028ba4c73b6970b..0000000000000000000000000000000000000000 --- a/backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 57ee073252dc826dbe412846a83421d2bb4483bc Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Wed, 22 Nov 2023 11:34:27 +0000 -Subject: [PATCH] Fix bad patch in 05206d66. The interpreter was handling - NOTEOL incorrectly in general after trying to fix it in invalid UTF subjects. - ---- - src/pcre2_intmodedep.h | 3 ++- - src/pcre2_match.c | 7 +++---- - testdata/testinput2 | 4 ++++ - testdata/testoutput2 | 6 ++++++ - 4 files changed, 15 insertions(+), 5 deletions(-) - -diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h -index 5e7e10d..423764d 100644 ---- a/src/pcre2_intmodedep.h -+++ b/src/pcre2_intmodedep.h -@@ -880,7 +880,8 @@ typedef struct match_block { - PCRE2_SPTR start_code; /* For use when recursing */ - PCRE2_SPTR start_subject; /* Start of the subject string */ - PCRE2_SPTR check_subject; /* Where UTF-checked from */ -- PCRE2_SPTR end_subject; /* End of the subject string */ -+ PCRE2_SPTR end_subject; /* Usable end of the subject string */ -+ PCRE2_SPTR true_end_subject; /* Actual end of the subject string */ - PCRE2_SPTR end_match_ptr; /* Subject position at end match */ - PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ - PCRE2_SPTR last_used_ptr; /* Latest consulted character */ -diff --git a/src/pcre2_match.c b/src/pcre2_match.c -index ea03976..c5e84ce 100644 ---- a/src/pcre2_match.c -+++ b/src/pcre2_match.c -@@ -6076,12 +6076,10 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, - if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS; - - /* Fall through */ -- /* Unconditional end of subject assertion (\z). We must check NOTEOL -- because it gets set for invalid UTF fragments. */ -+ /* Unconditional end of subject assertion (\z). */ - - case OP_EOD: -- if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0) -- RRETURN(MATCH_NOMATCH); -+ if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH); - if (mb->partial != 0) - { - mb->hitend = TRUE; -@@ -6891,6 +6889,7 @@ mb->callout_data = mcontext->callout_data; - mb->start_subject = subject; - mb->start_offset = start_offset; - mb->end_subject = end_subject; -+mb->true_end_subject = true_end_subject; - mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0; - mb->allowemptypartial = (re->max_lookbehind > 0) || - (re->flags & PCRE2_MATCH_EMPTY) != 0; -diff --git a/testdata/testinput2 b/testdata/testinput2 -index 0e24e78..b874f20 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -6055,4 +6055,8 @@ a)"xI - - /A +/extended - -+/a\z/ -+ a -+ a\=noteol -+ - # End of testinput2 -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index 68800fb..c1bc0e6 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -17946,6 +17946,12 @@ No match - - /A +/extended - -+/a\z/ -+ a -+ 0: a -+ a\=noteol -+ 0: a -+ - # End of testinput2 - Error -70: PCRE2_ERROR_BADDATA (unknown error number) - Error -62: bad serialized data --- -2.33.0 - diff --git a/backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch b/backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch deleted file mode 100644 index 9f6368703921120045572f075abed1d47c8af6b4..0000000000000000000000000000000000000000 --- a/backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch +++ /dev/null @@ -1,77 +0,0 @@ -From afce00e484cff118a824dac498e8044680dac401 Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Fri, 1 Dec 2023 16:49:59 +0000 -Subject: [PATCH] Fix compile loop in 32-bit mode for characters above the - Unicode limit when caseless and ucp are set. - ---- - src/pcre2_compile.c | 6 +++++- - testdata/testinput12 | 4 ++++ - testdata/testoutput12-16 | 5 +++++ - testdata/testoutput12-32 | 5 +++++ - 4 files changed, 19 insertions(+), 1 deletion(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 4a4fab1..3e4014b 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -4954,10 +4954,14 @@ uint32_t c, othercase, next; - unsigned int co; - - /* Find the first character that has an other case. If it has multiple other --cases, return its case offset value. */ -+cases, return its case offset value. In 32-bit mode, a value -+greater than the Unicode maximum ends the range. */ - - for (c = *cptr; c <= d; c++) - { -+#if PCRE2_CODE_UNIT_WIDTH == 32 -+ if (c > MAX_UTF_CODE_POINT) return -1; -+#endif - if ((co = UCD_CASESET(c)) != 0) - { - *ocptr = c++; /* Character that has the set */ -diff --git a/testdata/testinput12 b/testdata/testinput12 -index a6678bb..de3d406 100644 ---- a/testdata/testinput12 -+++ b/testdata/testinput12 -@@ -573,4 +573,8 @@ - /\X++/ - a\x{110000}\x{ffffffff} - -+# This used to loop in 32-bit mode; it will fail in 16-bit mode. -+/[\x{ffffffff}]/caseless,ucp -+ \x{ffffffff}xyz -+ - # End of testinput12 -diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 -index f3b40a3..9fa93fa 100644 ---- a/testdata/testoutput12-16 -+++ b/testdata/testoutput12-16 -@@ -1823,4 +1823,9 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to - ** Truncation will probably give the wrong result. - 0: a\x00\x{ffff} - -+# This used to loop in 32-bit mode; it will fail in 16-bit mode. -+/[\x{ffffffff}]/caseless,ucp -+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large -+ \x{ffffffff}xyz -+ - # End of testinput12 -diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 -index dd42f86..721d8bc 100644 ---- a/testdata/testoutput12-32 -+++ b/testdata/testoutput12-32 -@@ -1817,4 +1817,9 @@ No match - a\x{110000}\x{ffffffff} - 0: a\x{110000}\x{ffffffff} - -+# This used to loop in 32-bit mode; it will fail in 16-bit mode. -+/[\x{ffffffff}]/caseless,ucp -+ \x{ffffffff}xyz -+ 0: \x{ffffffff} -+ - # End of testinput12 --- -2.33.0 - diff --git a/backport-Fix-incorrect-class-character-matches-in-JIT.patch b/backport-Fix-incorrect-class-character-matches-in-JIT.patch deleted file mode 100644 index 7532dc25b77431ea1768b608bb456922e70dccaf..0000000000000000000000000000000000000000 --- a/backport-Fix-incorrect-class-character-matches-in-JIT.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 542cb11242cfc9be9b6218965751bfbb13a8b6a2 Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg -Date: Wed, 27 Dec 2023 08:27:17 +0000 -Subject: [PATCH] Fix incorrect class character matches in JIT - ---- - src/pcre2_jit_compile.c | 8 ++++++++ - src/pcre2_jit_test.c | 1 + - 2 files changed, 9 insertions(+) - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index c8a51da..f612574 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -8170,6 +8170,7 @@ while (*cc != XCL_END) - - jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); - -+ c = charoffset; - /* In case of ucp_Cf, we overwrite the result. */ - SET_CHAR_OFFSET(0x2066); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); -@@ -8181,6 +8182,9 @@ while (*cc != XCL_END) - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - -+ /* Restore charoffset. */ -+ SET_CHAR_OFFSET(c); -+ - JUMPHERE(jump); - jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); - break; -@@ -8196,6 +8200,7 @@ while (*cc != XCL_END) - - jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); - -+ c = charoffset; - /* In case of ucp_Cf, we overwrite the result. */ - SET_CHAR_OFFSET(0x2066); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); -@@ -8204,6 +8209,9 @@ while (*cc != XCL_END) - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - -+ /* Restore charoffset. */ -+ SET_CHAR_OFFSET(c); -+ - JUMPHERE(jump); - jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); - break; -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index e4dace8..f2c714b 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -417,6 +417,7 @@ static struct regression_test_case regression_test_cases[] = { - { MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" }, - { MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" }, - { CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" }, -+ { MUP, 0, 0, 0 | F_NOMATCH, "[^[:print:]\\x{f6f6}]", "\xef\x9b\xb6" }, - - /* Possible empty brackets. */ - { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" }, --- -2.33.0 - diff --git a/backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch b/backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch deleted file mode 100644 index 2a432870f69171178337a1cd288644f498fa0bb5..0000000000000000000000000000000000000000 --- a/backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch +++ /dev/null @@ -1,457 +0,0 @@ -From ad73148dfb6d06280a4d87f322991762aff90a55 Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Mon, 4 Dec 2023 16:11:41 +0000 -Subject: [PATCH] Fix incorrect matching of 0xffffffff to any character with - more than one other case in 32-bit UCP (but not UTF) mode. - ---- - src/pcre2_dfa_match.c | 28 ++++++++++++++++++++++++++ - src/pcre2_match.c | 43 ++++++++++++++++++++++++++++++++++------ - testdata/testinput12 | 26 ++++++++++++++++++++++++ - testdata/testinput14 | 27 +++++++++++++++++++++++++ - testdata/testoutput12-16 | 37 ++++++++++++++++++++++++++++++++++ - testdata/testoutput12-32 | 33 ++++++++++++++++++++++++++++++ - testdata/testoutput14-16 | 38 +++++++++++++++++++++++++++++++++++ - testdata/testoutput14-32 | 34 +++++++++++++++++++++++++++++++ - testdata/testoutput14-8 | 38 +++++++++++++++++++++++++++++++++++ - 9 files changed, 298 insertions(+), 6 deletions(-) - -diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c -index 1c48ad6..caae652 100644 ---- a/src/pcre2_dfa_match.c -+++ b/src/pcre2_dfa_match.c -@@ -1241,6 +1241,13 @@ for (;;) - break; - - case PT_CLIST: -+#if PCRE2_CODE_UNIT_WIDTH == 32 -+ if (c > MAX_UTF_CODE_POINT) -+ { -+ OK = FALSE; -+ break; -+ } -+#endif - cp = PRIV(ucd_caseless_sets) + code[2]; - for (;;) - { -@@ -1516,6 +1523,13 @@ for (;;) - break; - - case PT_CLIST: -+#if PCRE2_CODE_UNIT_WIDTH == 32 -+ if (c > MAX_UTF_CODE_POINT) -+ { -+ OK = FALSE; -+ break; -+ } -+#endif - cp = PRIV(ucd_caseless_sets) + code[3]; - for (;;) - { -@@ -1774,6 +1788,13 @@ for (;;) - break; - - case PT_CLIST: -+#if PCRE2_CODE_UNIT_WIDTH == 32 -+ if (c > MAX_UTF_CODE_POINT) -+ { -+ OK = FALSE; -+ break; -+ } -+#endif - cp = PRIV(ucd_caseless_sets) + code[3]; - for (;;) - { -@@ -2058,6 +2079,13 @@ for (;;) - break; - - case PT_CLIST: -+#if PCRE2_CODE_UNIT_WIDTH == 32 -+ if (c > MAX_UTF_CODE_POINT) -+ { -+ OK = FALSE; -+ break; -+ } -+#endif - cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2]; - for (;;) - { -diff --git a/src/pcre2_match.c b/src/pcre2_match.c -index d162e70..b2e1f23 100644 ---- a/src/pcre2_match.c -+++ b/src/pcre2_match.c -@@ -2565,6 +2565,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, - break; - - case PT_CLIST: -+#if PCRE2_CODE_UNIT_WIDTH == 32 -+ if (fc > MAX_UTF_CODE_POINT) -+ { -+ if (notmatch) break;; -+ RRETURN(MATCH_NOMATCH); -+ } -+#endif - cp = PRIV(ucd_caseless_sets) + Fecode[2]; - for (;;) - { -@@ -2885,6 +2892,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(fc, Feptr); -+#if PCRE2_CODE_UNIT_WIDTH == 32 -+ if (fc > MAX_UTF_CODE_POINT) -+ { -+ if (notmatch) continue; -+ RRETURN(MATCH_NOMATCH); -+ } -+#endif - cp = PRIV(ucd_caseless_sets) + Lpropvalue; - for (;;) - { -@@ -3698,6 +3712,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(fc, Feptr); -+#if PCRE2_CODE_UNIT_WIDTH == 32 -+ if (fc > MAX_UTF_CODE_POINT) -+ { -+ if (Lctype == OP_NOTPROP) continue; -+ RRETURN(MATCH_NOMATCH); -+ } -+#endif - cp = PRIV(ucd_caseless_sets) + Lpropvalue; - for (;;) - { -@@ -4278,14 +4299,24 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, - break; - } - GETCHARLENTEST(fc, Feptr, len); -- cp = PRIV(ucd_caseless_sets) + Lpropvalue; -- for (;;) -+#if PCRE2_CODE_UNIT_WIDTH == 32 -+ if (fc > MAX_UTF_CODE_POINT) - { -- if (fc < *cp) -- { if (notmatch) break; else goto GOT_MAX; } -- if (fc == *cp++) -- { if (notmatch) goto GOT_MAX; else break; } -+ if (!notmatch) goto GOT_MAX; - } -+ else -+#endif -+ { -+ cp = PRIV(ucd_caseless_sets) + Lpropvalue; -+ for (;;) -+ { -+ if (fc < *cp) -+ { if (notmatch) break; else goto GOT_MAX; } -+ if (fc == *cp++) -+ { if (notmatch) goto GOT_MAX; else break; } -+ } -+ } -+ - Feptr += len; - } - GOT_MAX: -diff --git a/testdata/testinput12 b/testdata/testinput12 -index de3d406..85550c3 100644 ---- a/testdata/testinput12 -+++ b/testdata/testinput12 -@@ -576,5 +576,31 @@ - # This used to loop in 32-bit mode; it will fail in 16-bit mode. - /[\x{ffffffff}]/caseless,ucp - \x{ffffffff}xyz -+ -+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They -+# will give errors in 16-bit mode. -+ -+/k*\x{ffffffff}/caseless,ucp -+ \x{ffffffff} -+ -+/k+\x{ffffffff}/caseless,ucp,no_start_optimize -+ K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff} -+ -+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+ -+/k\x{ffffffff}/caseless,ucp,no_start_optimize -+ K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+ -+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess -+\= Expect no match -+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z -+ -+# --------------------------------------------------------- - - # End of testinput12 -diff --git a/testdata/testinput14 b/testdata/testinput14 -index 8a17ae7..8880b5c 100644 ---- a/testdata/testinput14 -+++ b/testdata/testinput14 -@@ -78,4 +78,31 @@ - - # ---------------------------------------------------- - -+# ---------------------------------------------------- -+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit -+# mode; for the other widths they will fail. -+ -+/k*\x{ffffffff}/caseless,ucp -+ \x{ffffffff} -+ -+/k+\x{ffffffff}/caseless,ucp,no_start_optimize -+ K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff} -+ -+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+ -+/k\x{ffffffff}/caseless,ucp,no_start_optimize -+ K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+ -+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess -+\= Expect no match -+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z -+ -+# ---------------------------------------------------- -+ - # End of testinput14 -diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 -index 9fa93fa..616d693 100644 ---- a/testdata/testoutput12-16 -+++ b/testdata/testoutput12-16 -@@ -1827,5 +1827,42 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to - /[\x{ffffffff}]/caseless,ucp - Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large - \x{ffffffff}xyz -+ -+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They -+# will give errors in 16-bit mode. -+ -+/k*\x{ffffffff}/caseless,ucp -+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large -+ \x{ffffffff} -+ -+/k+\x{ffffffff}/caseless,ucp,no_start_optimize -+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large -+ K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff} -+ -+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize -+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+ -+/k\x{ffffffff}/caseless,ucp,no_start_optimize -+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large -+ K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+ -+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess -+\= Expect no match -+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z -+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. -+** Truncation will probably give the wrong result. -+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. -+** Truncation will probably give the wrong result. -+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. -+** Truncation will probably give the wrong result. -+No match -+ -+# --------------------------------------------------------- - - # End of testinput12 -diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 -index 721d8bc..3c9586e 100644 ---- a/testdata/testoutput12-32 -+++ b/testdata/testoutput12-32 -@@ -1821,5 +1821,38 @@ No match - /[\x{ffffffff}]/caseless,ucp - \x{ffffffff}xyz - 0: \x{ffffffff} -+ -+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They -+# will give errors in 16-bit mode. -+ -+/k*\x{ffffffff}/caseless,ucp -+ \x{ffffffff} -+ 0: \x{ffffffff} -+ -+/k+\x{ffffffff}/caseless,ucp,no_start_optimize -+ K\x{ffffffff} -+ 0: K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff} -+No match -+ -+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+No match -+ -+/k\x{ffffffff}/caseless,ucp,no_start_optimize -+ K\x{ffffffff} -+ 0: K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+No match -+ -+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess -+\= Expect no match -+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z -+No match -+ -+# --------------------------------------------------------- - - # End of testinput12 -diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16 -index 61541f6..dd1a977 100644 ---- a/testdata/testoutput14-16 -+++ b/testdata/testoutput14-16 -@@ -122,4 +122,42 @@ No match - - # ---------------------------------------------------- - -+# ---------------------------------------------------- -+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit -+# mode; for the other widths they will fail. -+ -+/k*\x{ffffffff}/caseless,ucp -+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large -+ \x{ffffffff} -+ -+/k+\x{ffffffff}/caseless,ucp,no_start_optimize -+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large -+ K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff} -+ -+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize -+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+ -+/k\x{ffffffff}/caseless,ucp,no_start_optimize -+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large -+ K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+ -+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess -+\= Expect no match -+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z -+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. -+** Truncation will probably give the wrong result. -+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. -+** Truncation will probably give the wrong result. -+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. -+** Truncation will probably give the wrong result. -+No match -+ -+# ---------------------------------------------------- -+ - # End of testinput14 -diff --git a/testdata/testoutput14-32 b/testdata/testoutput14-32 -index f1f65b7..dc21569 100644 ---- a/testdata/testoutput14-32 -+++ b/testdata/testoutput14-32 -@@ -122,4 +122,38 @@ No match - - # ---------------------------------------------------- - -+# ---------------------------------------------------- -+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit -+# mode; for the other widths they will fail. -+ -+/k*\x{ffffffff}/caseless,ucp -+ \x{ffffffff} -+ 0: \x{ffffffff} -+ -+/k+\x{ffffffff}/caseless,ucp,no_start_optimize -+ K\x{ffffffff} -+ 0: K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff} -+No match -+ -+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+No match -+ -+/k\x{ffffffff}/caseless,ucp,no_start_optimize -+ K\x{ffffffff} -+ 0: K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+No match -+ -+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess -+\= Expect no match -+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z -+No match -+ -+# ---------------------------------------------------- -+ - # End of testinput14 -diff --git a/testdata/testoutput14-8 b/testdata/testoutput14-8 -index aa62414..69285db 100644 ---- a/testdata/testoutput14-8 -+++ b/testdata/testoutput14-8 -@@ -122,4 +122,42 @@ Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too - - # ---------------------------------------------------- - -+# ---------------------------------------------------- -+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit -+# mode; for the other widths they will fail. -+ -+/k*\x{ffffffff}/caseless,ucp -+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large -+ \x{ffffffff} -+ -+/k+\x{ffffffff}/caseless,ucp,no_start_optimize -+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large -+ K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff} -+ -+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize -+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+ -+/k\x{ffffffff}/caseless,ucp,no_start_optimize -+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large -+ K\x{ffffffff} -+\= Expect no match -+ \x{ffffffff}\x{ffffffff}\x{ffffffff} -+ -+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess -+\= Expect no match -+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z -+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. -+** Truncation will probably give the wrong result. -+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. -+** Truncation will probably give the wrong result. -+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. -+** Truncation will probably give the wrong result. -+No match -+ -+# ---------------------------------------------------- -+ - # End of testinput14 --- -2.33.0 - diff --git a/backport-Fix-incorrect-patch-in-c1306126.patch b/backport-Fix-incorrect-patch-in-c1306126.patch deleted file mode 100644 index fa5d2312d9f722949a7bef09578a8c27e62f9883..0000000000000000000000000000000000000000 --- a/backport-Fix-incorrect-patch-in-c1306126.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 7fe586b892c9e0cbf3b21d57cfd8135e2311e45c Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Mon, 20 Nov 2023 15:41:06 +0000 -Subject: [PATCH] Fix incorrect patch in c1306126 - ---- - src/pcre2_compile.c | 20 ++++++++++++++------ - testdata/testinput2 | 2 ++ - testdata/testoutput2 | 2 ++ - 3 files changed, 18 insertions(+), 6 deletions(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index fdaf2ad..9e45580 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -2782,6 +2782,7 @@ uint32_t *previous_callout = NULL; - uint32_t *parsed_pattern = cb->parsed_pattern; - uint32_t *parsed_pattern_end = cb->parsed_pattern_end; - uint32_t *this_parsed_item = NULL; -+uint32_t *prev_parsed_item = NULL; - uint32_t meta_quantifier = 0; - uint32_t add_after_mark = 0; - uint32_t extra_options = cb->cx->extra_options; -@@ -2867,11 +2868,10 @@ while (ptr < ptrend) - uint32_t set, unset, *optset; - uint32_t terminator; - uint32_t prev_meta_quantifier; -- uint32_t *prev_parsed_item = this_parsed_item; - BOOL prev_okquantifier; - PCRE2_SPTR tempptr; - PCRE2_SIZE offset; -- -+ - if (parsed_pattern >= parsed_pattern_end) - { - errorcode = ERR63; /* Internal error (parsed pattern overflow) */ -@@ -2883,10 +2883,17 @@ while (ptr < ptrend) - errorcode = ERR19; - goto FAILED; /* Parentheses too deeply nested */ - } -- -- /* Remember where this item started */ - -- this_parsed_item = parsed_pattern; -+ /* If the last time round this loop something was added, parsed_pattern will -+ no longer be equal to this_parsed_item. Remember where the previous item -+ started and reset for the next item. Note that sometimes round the loop, -+ nothing gets added (e.g. for ignored white space). */ -+ -+ if (this_parsed_item != parsed_pattern) -+ { -+ prev_parsed_item = this_parsed_item; -+ this_parsed_item = parsed_pattern; -+ } - - /* Get next input character, save its position for callout handling. */ - -@@ -3440,7 +3447,8 @@ while (ptr < ptrend) - - /* ---- Quantifier post-processing ---- */ - -- /* Check that a quantifier is allowed after the previous item. */ -+ /* Check that a quantifier is allowed after the previous item. This -+ guarantees that there is a previous item. */ - - CHECK_QUANTIFIER: - if (!prev_okquantifier) -diff --git a/testdata/testinput2 b/testdata/testinput2 -index ba292d8..da845c1 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -6051,4 +6051,6 @@ a)"xI - /abcd/ - abcd\=ovector=65536 - -+/A +/extended -+ - # End of testinput2 -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index 888f06a..85de4ae 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -17932,6 +17932,8 @@ No match - abcd\=ovector=65536 - 0: abcd - -+/A +/extended -+ - # End of testinput2 - Error -70: PCRE2_ERROR_BADDATA (unknown error number) - Error -62: bad serialized data --- -2.33.0 - diff --git a/backport-Fix-incorrect-positive-error-code-from-pcre2_substitute.patch b/backport-Fix-incorrect-positive-error-code-from-pcre2_substitute.patch deleted file mode 100644 index 412ecbd29dbb23363999419dce866f3885350534..0000000000000000000000000000000000000000 --- a/backport-Fix-incorrect-positive-error-code-from-pcre2_substitute.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 829414f8e549fe7e4b1a6696ca70664e89e5e7f0 Mon Sep 17 00:00:00 2001 -From: Nicholas Wilson -Date: Wed, 18 Sep 2024 16:39:22 +0100 -Subject: [PATCH] Fix incorrect positive error code from pcre2_substitute() - (#481) - ---- - src/pcre2_substitute.c | 4 +++- - testdata/testinput2 | 6 ++++++ - testdata/testoutput2 | 10 ++++++++++ - 3 files changed, 19 insertions(+), 1 deletion(-) - -diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c -index 86c1d1e69..862ea9f73 100644 ---- a/src/pcre2_substitute.c -+++ b/src/pcre2_substitute.c -@@ -134,7 +134,9 @@ for (; ptr < ptrend; ptr++) - ptr -= 1; /* Back to last code unit of escape */ - if (errorcode != 0) - { -- rc = errorcode; -+ /* errorcode from check_escape is positive, so must not be returned by -+ pcre2_substitute(). */ -+ rc = PCRE2_ERROR_BADREPESCAPE; - goto EXIT; - } - -diff --git a/testdata/testinput2 b/testdata/testinput2 -index c2abdb890..8be78ff50 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -4201,6 +4201,12 @@ - 123abc123\=substitute_overflow_length,replace=[1]x$1z - 123abc123\=substitute_overflow_length,replace=[0]x$1z - -+/a(b)c/substitute_extended -+ ZabcZ\=replace=>${1:+ yes : no } -+ ZabcZ\=replace=>${1:+ \o{100} : \o{100} } -+ ZabcZ\=replace=>${1:+ \o{Z} : no } -+ ZabcZ\=replace=>${1:+ yes : \o{Z} } -+ - "((?=(?(?=(?(?=(?(?=()))))))))" - a - -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index 7a582cd23..ccf209b5c 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -13818,6 +13818,16 @@ Failed: error -48: no more memory: 10 code units are needed - 123abc123\=substitute_overflow_length,replace=[0]x$1z - Failed: error -48: no more memory: 10 code units are needed - -+/a(b)c/substitute_extended -+ ZabcZ\=replace=>${1:+ yes : no } -+ 1: Z> yes Z -+ ZabcZ\=replace=>${1:+ \o{100} : \o{100} } -+ 1: Z> @ Z -+ ZabcZ\=replace=>${1:+ \o{Z} : no } -+Failed: error -57 at offset 9 in replacement: bad escape sequence in replacement string -+ ZabcZ\=replace=>${1:+ yes : \o{Z} } -+Failed: error -57 at offset 15 in replacement: bad escape sequence in replacement string -+ - "((?=(?(?=(?(?=(?(?=()))))))))" - a - 0: diff --git a/backport-Fix-non-recognition-of-some-octal-escapes-in-substitute.patch b/backport-Fix-non-recognition-of-some-octal-escapes-in-substitute.patch deleted file mode 100644 index 1fc4266df66e833580ab7308ea8f0cb4a3cabc4a..0000000000000000000000000000000000000000 --- a/backport-Fix-non-recognition-of-some-octal-escapes-in-substitute.patch +++ /dev/null @@ -1,263 +0,0 @@ -From d29e729000a3724e2aebaa64318dfd7530a55370 Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Wed, 4 Sep 2024 16:18:35 +0100 -Subject: [PATCH] Fix non-recognition of some octal escapes in substitute - replacement strings - ---- - src/pcre2_compile.c | 15 ++++++++------- - src/pcre2_substitute.c | 4 ++-- - testdata/testinput11 | 6 ++++++ - testdata/testinput2 | 12 ++++++++++++ - testdata/testinput5 | 3 +++ - testdata/testinput9 | 8 ++++++++ - testdata/testoutput11-16 | 8 ++++++++ - testdata/testoutput11-32 | 8 ++++++++ - testdata/testoutput2 | 16 ++++++++++++++++ - testdata/testoutput5 | 4 ++++ - testdata/testoutput9 | 10 ++++++++++ - 11 files changed, 85 insertions(+), 9 deletions(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index ad2baf8..80a1a48 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -1480,8 +1480,8 @@ final code unit of the escape sequence. - This function is also called from pcre2_substitute() to handle escape sequences - in replacement strings. In this case, the cb argument is NULL, and in the case - of escapes that have further processing, only sequences that define a data --character are recognised. The isclass argument is not relevant; the options --argument is the final value of the compiled pattern's options. -+character are recognised. The options argument is the final value of the -+compiled pattern's options. - - Arguments: - ptrptr points to the input position pointer -@@ -1496,7 +1496,7 @@ Arguments: - errorcodeptr points to the errorcode variable (containing zero) - options the current options bits - xoptions the current extra options bits -- isclass TRUE if inside a character class -+ isclassorsub TRUE if in a character class or called from pcre2_substitute() - cb compile data block or NULL when called from pcre2_substitute() - - Returns: zero => a data character -@@ -1507,7 +1507,7 @@ Returns: zero => a data character - - int - PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr, -- int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass, -+ int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclassorsub, - compile_block *cb) - { - BOOL utf = (options & PCRE2_UTF) != 0; -@@ -1607,7 +1607,8 @@ else - - if (cb == NULL) - { -- if (c != CHAR_c && c != CHAR_o && c != CHAR_x) -+ if (c < CHAR_0 || -+ (c > CHAR_9 && (c != CHAR_c && c != CHAR_o && c != CHAR_x))) - { - *errorcodeptr = ERR3; - return 0; -@@ -1719,7 +1720,7 @@ else - */ - - case CHAR_g: -- if (isclass) break; -+ if (isclassorsub) break; - - if (ptr >= ptrend) - { -@@ -1791,7 +1792,7 @@ else - case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: - case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: - -- if (!isclass) -+ if (!isclassorsub) - { - oldptr = ptr; - ptr--; /* Back to the digit */ -diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c -index d1f17eb05..1ccef0660 100644 ---- a/src/pcre2_substitute.c -+++ b/src/pcre2_substitute.c -@@ -130,7 +130,7 @@ for (; ptr < ptrend; ptr++) - - ptr += 1; /* Must point after \ */ - erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode, -- code->overall_options, code->extra_options, FALSE, NULL); -+ code->overall_options, code->extra_options, TRUE, NULL); - ptr -= 1; /* Back to last code unit of escape */ - if (errorcode != 0) - { -@@ -858,7 +858,7 @@ do - - ptr++; /* Point after \ */ - rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode, -- code->overall_options, code->extra_options, FALSE, NULL); -+ code->overall_options, code->extra_options, TRUE, NULL); - if (errorcode != 0) goto BADESCAPE; - - switch(rc) -diff --git a/testdata/testinput11 b/testdata/testinput11 -index 2bc8a25e3..69aea351b 100644 ---- a/testdata/testinput11 -+++ b/testdata/testinput11 -@@ -371,4 +371,10 @@ - /(?i:A{1,}\6666666666)/ - A\x{1b6}6666666 - -+/abc/substitute_extended,replace=>\777< -+ abc -+ -+/abc/substitute_extended,replace=>\o{012345}< -+ abc -+ - # End of testinput11 -diff --git a/testdata/testinput2 b/testdata/testinput2 -index 7d8dfc149..51e2095c8 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -4668,6 +4668,18 @@ B)x/alt_verbnames,mark - /abcd/g - >abcd1234abcd5678<\=replace=wxyz,substitute_matched - -+/abc/substitute_extended,replace=>\045< -+ abc -+ -+/abc/substitute_extended,replace=>\45< -+ abc -+ -+/abc/substitute_extended,replace=>\o{45}< -+ abc -+ -+/abc/substitute_extended,replace=>\845< -+ abc -+ - /^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I - - /((p(?'K/ -diff --git a/testdata/testinput5 b/testdata/testinput5 -index 9126236..da2830d 100644 ---- a/testdata/testinput5 -+++ b/testdata/testinput5 -@@ -2442,4 +2442,7 @@ - - # End PCRE2_EXTRA_ASCII_xxx tests - -+/abc/utf,substitute_extended,replace=>\777< -+ abc -+ - # End of testinput5 -diff --git a/testdata/testinput9 b/testdata/testinput9 -index 4eb228afe..f2f50033f 100644 ---- a/testdata/testinput9 -+++ b/testdata/testinput9 -@@ -263,4 +263,12 @@ - /(?i:A{1,}\6666666666)/ - A\x{1b6}6666666 - -+# Should cause an error -+/abc/substitute_extended,replace=>\777< -+ abc -+ -+# Should cause an error -+/abc/substitute_extended,replace=>\o{012345}< -+ abc -+ - # End of testinput9 -diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 -index f70d89ee9..806f6b3e0 100644 ---- a/testdata/testoutput11-16 -+++ b/testdata/testoutput11-16 -@@ -665,4 +665,12 @@ Subject length lower bound = 1 - A\x{1b6}6666666 - 0: A\x{1b6}6666666 - -+/abc/substitute_extended,replace=>\777< -+ abc -+ 1: >\x{1ff}< -+ -+/abc/substitute_extended,replace=>\o{012345}< -+ abc -+ 1: >\x{14e5}< -+ - # End of testinput11 -diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 -index 961c4cd05..c5f5c8a42 100644 ---- a/testdata/testoutput11-32 -+++ b/testdata/testoutput11-32 -@@ -671,4 +671,12 @@ Subject length lower bound = 1 - A\x{1b6}6666666 - 0: A\x{1b6}6666666 - -+/abc/substitute_extended,replace=>\777< -+ abc -+ 1: >\x{1ff}< -+ -+/abc/substitute_extended,replace=>\o{012345}< -+ abc -+ 1: >\x{14e5}< -+ - # End of testinput11 -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index 1cffe6a36..eeb635d6d 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -14934,6 +14934,22 @@ Failed: error -55 at offset 3 in replacement: requested value is not set - >abcd1234abcd5678<\=replace=wxyz,substitute_matched - 2: >wxyz1234wxyz5678< - -+/abc/substitute_extended,replace=>\045< -+ abc -+ 1: >%< -+ -+/abc/substitute_extended,replace=>\45< -+ abc -+ 1: >%< -+ -+/abc/substitute_extended,replace=>\o{45}< -+ abc -+ 1: >%< -+ -+/abc/substitute_extended,replace=>\845< -+ abc -+ 1: >845< -+ - /^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I - Capture group count = 2 - Max back reference = 1 -diff --git a/testdata/testoutput5 b/testdata/testoutput5 -index b1842df..24d849c 100644 ---- a/testdata/testoutput5 -+++ b/testdata/testoutput5 -@@ -5375,4 +5375,8 @@ No match - - # End PCRE2_EXTRA_ASCII_xxx tests - -+/abc/utf,substitute_extended,replace=>\777< -+ abc -+ 1: >\x{1ff}< -+ - # End of testinput5 -diff --git a/testdata/testoutput9 b/testdata/testoutput9 -index 3613703e0..8556c9e14 100644 ---- a/testdata/testoutput9 -+++ b/testdata/testoutput9 -@@ -371,4 +371,14 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), - Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode - A\x{1b6}6666666 - -+# Should cause an error -+/abc/substitute_extended,replace=>\777< -+ abc -+Failed: error -57 at offset 5 in replacement: bad escape sequence in replacement string -+ -+# Should cause an error -+/abc/substitute_extended,replace=>\o{012345}< -+ abc -+Failed: error -57 at offset 10 in replacement: bad escape sequence in replacement string -+ - # End of testinput9 diff --git a/backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch b/backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch deleted file mode 100644 index 0e866e7a6a373d97a5a5b68aa2498434ac82ee12..0000000000000000000000000000000000000000 --- a/backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch +++ /dev/null @@ -1,28 +0,0 @@ -From b88126f42382fa470b6480f82489303d4311ce18 Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Thu, 16 Nov 2023 13:49:49 +0000 -Subject: [PATCH] Fix oversight in DFA when changing OP_REVERSE; also add some - unrelated tests - -Conflict:don't add unrelated tests - ---- - src/pcre2_dfa_match.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c -index e90c984..5768407 100644 ---- a/src/pcre2_dfa_match.c -+++ b/src/pcre2_dfa_match.c -@@ -591,7 +591,7 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT) - end_code = this_start_code; - do - { -- size_t back = (size_t)GET(end_code, 2+LINK_SIZE); -+ size_t back = (size_t)GET2(end_code, 2+LINK_SIZE); - if (back > max_back) max_back = back; - end_code += GET(end_code, 1); - } --- -2.33.0 - diff --git a/backport-Fix-oversight-in-adding-new-pcre2grep-test.patch b/backport-Fix-oversight-in-adding-new-pcre2grep-test.patch deleted file mode 100644 index fdc62f2cb59ed609def0a7f59d9e48fdf89ebe3a..0000000000000000000000000000000000000000 --- a/backport-Fix-oversight-in-adding-new-pcre2grep-test.patch +++ /dev/null @@ -1,49 +0,0 @@ -From ace78dc460e7e80592d86216cfdd36a62b083bb3 Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Wed, 27 Nov 2024 15:50:34 +0000 -Subject: [PATCH] Fix oversight in adding new pcre2grep test - -Conflict:NA -Reference:https://github.com/PCRE2Project/pcre2/commit/ace78dc460e7e80592d86216cfdd36a62b083bb3 - ---- - testdata/grepinput | 2 +- - testdata/grepoutput | 3 +-- - 2 files changed, 2 insertions(+), 3 deletions(-) - -diff --git a/testdata/grepinput b/testdata/grepinput -index 91d3db88..1a0a9c0f 100644 ---- a/testdata/grepinput -+++ b/testdata/grepinput -@@ -630,7 +630,7 @@ asd - dfg - ghj - jkl --abc -+abx - def - ghi - xyz -diff --git a/testdata/grepoutput b/testdata/grepoutput -index 58ea858d..abfabe15 100644 ---- a/testdata/grepoutput -+++ b/testdata/grepoutput -@@ -104,7 +104,6 @@ pcre2grep: Error in command-line regex at offset 4: quantifier does not follow a - RC=2 - ---------------------------- Test 16 ----------------------------- - pcre2grep: Failed to open ./testdata/nonexistfile: No such file or directory --./testdata/grepinput:abc - RC=2 - ---------------------------- Test 17 ----------------------------- - features should be added at the end, because some of the tests involve the -@@ -1306,7 +1305,7 @@ RC=0 - 630-dfg - 631-ghj - 632:jkl --633-abc -+633-abx - 634-def - 635-ghi - RC=0 --- -2.23.0 \ No newline at end of file diff --git a/backport-Fix-the-lookahead-after-d-or-posix-to-skip-whitespac.patch b/backport-Fix-the-lookahead-after-d-or-posix-to-skip-whitespac.patch deleted file mode 100644 index b11db588823d0349947abbc9f1123fbb6ba5798e..0000000000000000000000000000000000000000 --- a/backport-Fix-the-lookahead-after-d-or-posix-to-skip-whitespac.patch +++ /dev/null @@ -1,318 +0,0 @@ -From 16d7edb56757e5294eeeecc9a19135aab89a50ba Mon Sep 17 00:00:00 2001 -From: Nicholas Wilson -Date: Fri, 1 Nov 2024 17:13:34 +0000 -Subject: [PATCH] Fix the lookahead after [\d or [[:posix] to skip whitespace - (#544) - -Conflict:don't modify alt_extended_class because fc38d9e784 is not merged; -don't modify class_op_state because class_op_state is not merged; adapt context -Reference:https://github.com/PCRE2Project/pcre2/commit/16d7edb56757e5294eeeecc9a19135aab89a50ba - ---- - src/pcre2_compile.c | 88 +++++++++++++++++++++++++++--------------- - src/pcre2_intmodedep.h | 2 +- - testdata/testinput1 | 20 +++++++--- - testdata/testinput2 | 8 ++++ - testdata/testoutput1 | 30 ++++++++++---- - testdata/testoutput2 | 12 ++++++ - 6 files changed, 113 insertions(+), 47 deletions(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 2493c871..9be26b07 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -2681,7 +2681,14 @@ the main compiling phase. */ - /* States used for analyzing ranges in character classes. The two OK values - must be last. */ - --enum { RANGE_NO, RANGE_STARTED, RANGE_OK_ESCAPED, RANGE_OK_LITERAL }; -+enum { -+ RANGE_NO, /* State after '[' (initial), or '[a-z'; hyphen is literal */ -+ RANGE_STARTED, /* State after '[1-'; last-emitted code is META_RANGE_XYZ */ -+ RANGE_FORBID_NO, /* State after '[\d'; '-]' is allowed but not '-1]' */ -+ RANGE_FORBID_STARTED, /* State after '[\d-'*/ -+ RANGE_OK_ESCAPED, /* State after '[1'; hyphen may be a range */ -+ RANGE_OK_LITERAL /* State after '[\1'; hyphen may be a range */ -+}; - - /* Only in 32-bit mode can there be literals > META_END. A macro encapsulates - the storing of literal values in the main parsed pattern, where they can always -@@ -2734,6 +2741,7 @@ PCRE2_SPTR thisptr; - PCRE2_SPTR name; - PCRE2_SPTR ptrend = cb->end_pattern; - PCRE2_SPTR verbnamestart = NULL; /* Value avoids compiler warning */ -+PCRE2_SPTR class_range_forbid_ptr = NULL; - named_group *ng; - nest_save *top_nest, *end_nests; - -@@ -3559,6 +3567,21 @@ while (ptr < ptrend) - goto FAILED; - } - -+ /* Perl treats a hyphen after a POSIX class as a literal, not the -+ start of a range. However, it gives a warning in its warning mode -+ unless the hyphen is the last character in the class. PCRE does not -+ have a warning mode, so we give an error, because this is likely an -+ error on the user's part. -+ -+ Roll back to the hyphen for the error position. */ -+ -+ if (class_range_state == RANGE_FORBID_STARTED) -+ { -+ ptr = class_range_forbid_ptr; -+ errorcode = ERR50; -+ goto FAILED; -+ } -+ - if (*ptr != CHAR_COLON) - { - errorcode = ERR13; -@@ -3579,26 +3602,12 @@ while (ptr < ptrend) - } - ptr = tempptr + 2; - -- /* Perl treats a hyphen after a POSIX class as a literal, not the -- start of a range. However, it gives a warning in its warning mode -- unless the hyphen is the last character in the class. PCRE does not -- have a warning mode, so we give an error, because this is likely an -- error on the user's part. */ -- -- if (ptr < ptrend - 1 && *ptr == CHAR_MINUS && -- ptr[1] != CHAR_RIGHT_SQUARE_BRACKET) -- { -- errorcode = ERR50; -- goto FAILED; -- } -- -- /* Set "a hyphen is not the start of a range" for the -] case, and also -- in case the POSIX class is followed by \E or \Q\E (possibly repeated - -- fuzzers do that kind of thing) and *then* a hyphen. This causes that -- hyphen to be treated as a literal. I don't think it's worth setting up -- special apparatus to do otherwise. */ -+ /* Set "a hyphen is forbidden to be the start of a range". For the '-]' -+ case, the hyphen is treated as a literal, but for '-1' it is disallowed -+ (because it would be interpreted as range). */ - -- class_range_state = RANGE_NO; -+ class_range_state = RANGE_FORBID_NO; -+ class_range_forbid_ptr = ptr; - - /* When PCRE2_UCP is set, unless PCRE2_EXTRA_ASCII_POSIX is set, some - of the POSIX classes are converted to use Unicode properties \p or \P -@@ -3648,6 +3657,14 @@ while (ptr < ptrend) - class_range_state = RANGE_STARTED; - } - -+ /* Handle forbidden start of range */ -+ -+ else if (c == CHAR_MINUS && class_range_state == RANGE_FORBID_NO) -+ { -+ *parsed_pattern++ = CHAR_MINUS; -+ class_range_state = RANGE_FORBID_STARTED; -+ } -+ - /* Handle a literal character */ - - else if (c != CHAR_BACKSLASH) -@@ -3670,6 +3687,12 @@ while (ptr < ptrend) - } - class_range_state = RANGE_NO; - } -+ else if (class_range_state == RANGE_FORBID_STARTED) -+ { -+ ptr = class_range_forbid_ptr; -+ errorcode = ERR50; -+ goto FAILED; -+ } - else /* Potential start of range */ - { - class_range_state = char_is_literal? -@@ -3733,13 +3756,23 @@ while (ptr < ptrend) - if (class_range_state == RANGE_STARTED) - { - errorcode = ERR50; -- goto FAILED; /* Not CLASS_ESCAPE_FAILED; always an error */ -+ goto FAILED; -+ } -+ /* Perl gives a warning unless the hyphen following a multi-character -+ escape is the last character in the class. PCRE throws an error. */ -+ if (class_range_state == RANGE_FORBID_STARTED) -+ { -+ ptr = class_range_forbid_ptr; -+ errorcode = ERR50; -+ goto FAILED; - } - - /* Of the remaining escapes, only those that define characters are - allowed in a class. None may start a range. */ - -- class_range_state = RANGE_NO; -+ class_range_state = RANGE_FORBID_NO; -+ class_range_forbid_ptr = ptr; -+ - switch(escape) - { - case ESC_N: -@@ -3779,6 +3812,7 @@ while (ptr < ptrend) - if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P; - *parsed_pattern++ = META_ESCAPE + escape; - *parsed_pattern++ = (ptype << 16) | pdata; -+ class_range_forbid_ptr = ptr; - } - #else - errorcode = ERR45; -@@ -3791,16 +3825,6 @@ while (ptr < ptrend) - ptr--; - goto FAILED; - } -- -- /* Perl gives a warning unless a following hyphen is the last character -- in the class. PCRE throws an error. */ -- -- if (ptr < ptrend - 1 && *ptr == CHAR_MINUS && -- ptr[1] != CHAR_RIGHT_SQUARE_BRACKET) -- { -- errorcode = ERR50; -- goto FAILED; -- } - } - - /* Proceed to next thing in the class. */ -diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h -index 598060c9..a11b4faa 100644 ---- a/src/pcre2_intmodedep.h -+++ b/src/pcre2_intmodedep.h -@@ -435,7 +435,7 @@ UTF-16 mode. */ - c = *eptr; \ - if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len); - --/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the -+/* Get the next UTF-16 character, testing for UTF-16 mode, not advancing the - pointer, incrementing length if there is a low surrogate. This is called when - we do not know if we are in UTF-16 mode. */ - -diff --git a/testdata/testinput1 b/testdata/testinput1 -index 0794502e..1e50369f 100644 ---- a/testdata/testinput1 -+++ b/testdata/testinput1 -@@ -5787,12 +5787,6 @@ ef) x/x,mark - - /(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ - --/[s[:digit:]\E-H]+/ -- s09-H -- --/[s[:digit:]\Q\E-H]+/ -- s09-H -- - /a+(?:|b)a/ - aaaa - -@@ -6435,4 +6429,18 @@ ef) x/x,mark - /(a\K.(?1)*)/ - abac - -+/[[:digit:]- ]/xx -+ 1 -+ - -+\= Expect no match -+ z -+ \ \ -+ -+/[\d- ]/xx -+ 1 -+ - -+\= Expect no match -+ z -+ \ \ -+ - # End of testinput1 -diff --git a/testdata/testinput2 b/testdata/testinput2 -index b6464a0b..61b94e69 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -5981,4 +5981,12 @@ a)"xI - a - a\=noteol - -+/[[:digit:] -Z]/xx -+ -+/[\d -Z]/xx -+ -+/[[:digit:]\E-H]/ -+ -+/[[:digit:]\Q\E-H]+/ -+ - # End of testinput2 -diff --git a/testdata/testoutput1 b/testdata/testoutput1 -index 8daf8362..6f927729 100644 ---- a/testdata/testoutput1 -+++ b/testdata/testoutput1 -@@ -9246,14 +9246,6 @@ No match - - /(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ - --/[s[:digit:]\E-H]+/ -- s09-H -- 0: s09-H -- --/[s[:digit:]\Q\E-H]+/ -- s09-H -- 0: s09-H -- - /a+(?:|b)a/ - aaaa - 0: aaaa -@@ -10197,4 +10189,26 @@ No match - 0: c - 1: abac - -+/[[:digit:]- ]/xx -+ 1 -+ 0: 1 -+ - -+ 0: - -+\= Expect no match -+ z -+No match -+ \ \ -+No match -+ -+/[\d- ]/xx -+ 1 -+ 0: 1 -+ - -+ 0: - -+\= Expect no match -+ z -+No match -+ \ \ -+No match -+ - # End of testinput1 -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index 1075b4d4..86bfe964 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -17815,6 +17815,18 @@ Subject length lower bound = 2 - a\=noteol - 0: a - -+/[[:digit:] -Z]/xx -+Failed: error 150 at offset 10: invalid range in character class -+ -+/[\d -Z]/xx -+Failed: error 150 at offset 3: invalid range in character class -+ -+/[[:digit:]\E-H]/ -+Failed: error 150 at offset 10: invalid range in character class -+ -+/[[:digit:]\Q\E-H]+/ -+Failed: error 150 at offset 10: invalid range in character class -+ - # End of testinput2 - Error -70: PCRE2_ERROR_BADDATA (unknown error number) - Error -62: bad serialized data --- -2.33.0 - diff --git a/backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch b/backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch deleted file mode 100644 index 2ac32d285480417da219c3d6992beeb8956f54f4..0000000000000000000000000000000000000000 --- a/backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 05206d66340341bef7a673108a855f594c148950 Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Sun, 19 Nov 2023 18:32:10 +0000 -Subject: [PATCH] Fix \z behaviour when matching within invalid UTF - ---- - src/pcre2_match.c | 6 ++++-- - testdata/testinput10 | 3 +++ - testdata/testinput12 | 3 +++ - testdata/testoutput10 | 4 ++++ - testdata/testoutput12-16 | 4 ++++ - testdata/testoutput12-32 | 4 ++++ - 6 files changed, 22 insertions(+), 2 deletions(-) - -diff --git a/src/pcre2_match.c b/src/pcre2_match.c -index 2dcf8c4..ea03976 100644 ---- a/src/pcre2_match.c -+++ b/src/pcre2_match.c -@@ -6076,10 +6076,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, - if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS; - - /* Fall through */ -- /* Unconditional end of subject assertion (\z) */ -+ /* Unconditional end of subject assertion (\z). We must check NOTEOL -+ because it gets set for invalid UTF fragments. */ - - case OP_EOD: -- if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH); -+ if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0) -+ RRETURN(MATCH_NOMATCH); - if (mb->partial != 0) - { - mb->hitend = TRUE; -diff --git a/testdata/testinput10 b/testdata/testinput10 -index c7618b1..e901d51 100644 ---- a/testdata/testinput10 -+++ b/testdata/testinput10 -@@ -642,4 +642,7 @@ - qchq\=ph - qchq\=ps - -+/A\z/utf,match_invalid_utf -+ A\x80\x42\n -+ - # End of testinput10 -diff --git a/testdata/testinput12 b/testdata/testinput12 -index 1e552e6..5a2d8d2 100644 ---- a/testdata/testinput12 -+++ b/testdata/testinput12 -@@ -464,6 +464,9 @@ - - /aa/utf,ucp,match_invalid_utf,global - \x{d800}aa -+ -+/A\z/utf,match_invalid_utf -+ A\x{df00}\n - - # ---------------------------------------------------- - -diff --git a/testdata/testoutput10 b/testdata/testoutput10 -index 18dd9d2..8145891 100644 ---- a/testdata/testoutput10 -+++ b/testdata/testoutput10 -@@ -1921,4 +1921,8 @@ Partial match: - qchq\=ps - Partial match: - -+/A\z/utf,match_invalid_utf -+ A\x80\x42\n -+No match -+ - # End of testinput10 -diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 -index 8cbc13d..9ac403e 100644 ---- a/testdata/testoutput12-16 -+++ b/testdata/testoutput12-16 -@@ -1607,6 +1607,10 @@ No match - /aa/utf,ucp,match_invalid_utf,global - \x{d800}aa - 0: aa -+ -+/A\z/utf,match_invalid_utf -+ A\x{df00}\n -+No match - - # ---------------------------------------------------- - -diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 -index 1a98b4b..9396305 100644 ---- a/testdata/testoutput12-32 -+++ b/testdata/testoutput12-32 -@@ -1605,6 +1605,10 @@ No match - /aa/utf,ucp,match_invalid_utf,global - \x{d800}aa - 0: aa -+ -+/A\z/utf,match_invalid_utf -+ A\x{df00}\n -+No match - - # ---------------------------------------------------- - --- -2.33.0 - diff --git a/backport-Fixing-an-issue-using-empty-character-sets-in-jit.patch b/backport-Fixing-an-issue-using-empty-character-sets-in-jit.patch deleted file mode 100644 index 7f72bf39ae646b4c43d6183fe2feda8d5d1488dd..0000000000000000000000000000000000000000 --- a/backport-Fixing-an-issue-using-empty-character-sets-in-jit.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 994536c96fa571bcfd9232001e73b78c6afb9e67 Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg -Date: Thu, 21 Mar 2024 07:33:17 +0000 -Subject: [PATCH] Fixing an issue using empty character sets in jit - -Conflict:adapt context -Reference:https://github.com/PCRE2Project/pcre2/commit/994536c96fa571bcfd9232001e73b78c6afb9e67 - ---- - src/pcre2_jit_compile.c | 23 ++++++++++++++++------- - src/pcre2_jit_test.c | 1 + - 2 files changed, 17 insertions(+), 7 deletions(-) - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index afff36a..c19723b 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -6186,25 +6186,34 @@ if (max < 1) - /* Convert last_count to priority. */ - for (i = 0; i < max; i++) - { -- SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count); -+ SLJIT_ASSERT(chars[i].last_count <= chars[i].count); - -- if (chars[i].count == 1) -+ switch (chars[i].count) - { -+ case 0: -+ chars[i].count = 255; -+ chars[i].last_count = 0; -+ break; -+ -+ case 1: - chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5; - /* Simplifies algorithms later. */ - chars[i].chars[1] = chars[i].chars[0]; -- } -- else if (chars[i].count == 2) -- { -+ break; -+ -+ case 2: - SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]); - - if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1])) - chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4; - else - chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2; -- } -- else -+ break; -+ -+ default: - chars[i].last_count = (chars[i].count == 255) ? 0 : 1; -+ break; -+ } - } - - #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index f051bd2..6d95bb9 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -396,6 +396,7 @@ static struct regression_test_case regression_test_cases[] = { - { MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" }, - { MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" }, - { CMU, A, 0, 0 | F_NOMATCH, "^[\\x{0100}-\\x{017f}]", " " }, -+ { M, A, 0, 0 | F_NOMATCH, "[^\\S\\W]{6}", "abcdefghijk" }, - - /* Unicode properties. */ - { MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" }, --- -2.23.0 - diff --git a/backport-Further-ASCII-tests-and-minor-bugfix-plus-ChangeLog-.patch b/backport-Further-ASCII-tests-and-minor-bugfix-plus-ChangeLog-.patch deleted file mode 100644 index b4847fb3a48a2e3342c59195597aae7c89438525..0000000000000000000000000000000000000000 --- a/backport-Further-ASCII-tests-and-minor-bugfix-plus-ChangeLog-.patch +++ /dev/null @@ -1,104 +0,0 @@ -From fc56fd790c1a3ba8f2890fc2b6afba21250923de Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Thu, 2 Feb 2023 17:19:45 +0000 -Subject: [PATCH] Further ASCII tests and minor bugfix plus ChangeLog update - -Conflict:don't modify ChangeLog -Reference:https://github.com/PCRE2Project/pcre2/commit/fc56fd790c1a3ba8f2890fc2b6afba21250923de - ---- - src/pcre2_compile.c | 5 ++--- - testdata/testinput5 | 5 +++++ - testdata/testinput7 | 5 +++++ - testdata/testoutput5 | 7 +++++++ - testdata/testoutput7 | 7 +++++++ - 5 files changed, 26 insertions(+), 3 deletions(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index b8a9e098..64a35bda 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -2660,10 +2660,9 @@ the main compiling phase. */ - PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \ - PCRE2_UNGREEDY) - --#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT) -- - #define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT| \ -- PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW) -+ PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW| \ -+ PCRE2_EXTRA_ASCII_POSIX) - - /* States used for analyzing ranges in character classes. The two OK values - must be last. */ -diff --git a/testdata/testinput5 b/testdata/testinput5 -index 6e186cf0..49b46f82 100644 ---- a/testdata/testinput5 -+++ b/testdata/testinput5 -@@ -2434,6 +2434,11 @@ - /(?aP)[[:alnum:]\d]+/i,ucp,utf - abc\x{660}xyz - -+/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/ -+ \x{660}A\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+ - # VARIOUS - - /[\d\s\w]+/a,ucp,utf -diff --git a/testdata/testinput7 b/testdata/testinput7 -index 64a37ad2..a2b7fb8d 100644 ---- a/testdata/testinput7 -+++ b/testdata/testinput7 -@@ -2453,6 +2453,11 @@ - /(?aP)[[:alnum:]\d]+/i,ucp,utf - abc\x{660}xyz - -+/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/ -+ \x{660}A\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+ - # VARIOUS - - /[\d\s\w]+/a,ucp,utf -diff --git a/testdata/testoutput5 b/testdata/testoutput5 -index 26972f70..4f845c84 100644 ---- a/testdata/testoutput5 -+++ b/testdata/testoutput5 -@@ -5365,6 +5365,13 @@ No match - abc\x{660}xyz - 0: abc\x{660}xyz - -+/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/ -+ \x{660}A\x{660} -+ 0: \x{660}A\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+No match -+ - # VARIOUS - - /[\d\s\w]+/a,ucp,utf -diff --git a/testdata/testoutput7 b/testdata/testoutput7 -index c830748c..4065981d 100644 ---- a/testdata/testoutput7 -+++ b/testdata/testoutput7 -@@ -4105,6 +4105,13 @@ No match - abc\x{660}xyz - 0: abc\x{660}xyz - -+/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/ -+ \x{660}A\x{660} -+ 0: \x{660}A\x{660} -+\= Expect no match -+ \x{660}\x{660}\x{660} -+No match -+ - # VARIOUS - - /[\d\s\w]+/a,ucp,utf --- -2.33.0 - diff --git a/backport-Guard-against-out-of-bounds-memory-access-when-parsing.patch b/backport-Guard-against-out-of-bounds-memory-access-when-parsing.patch deleted file mode 100644 index d30309e16bbac2a11b7fb2a0a20d7153aa9c7cca..0000000000000000000000000000000000000000 --- a/backport-Guard-against-out-of-bounds-memory-access-when-parsing.patch +++ /dev/null @@ -1,233 +0,0 @@ -From ef218fbba60bfe5b0a8ac9ea4445eac5fb0847e5 Mon Sep 17 00:00:00 2001 -From: Alex Dowad -Date: Sat, 7 Sep 2024 00:16:03 +0900 -Subject: [PATCH] Guard against out-of-bounds memory access when parsing - LIMIT_HEAP et al (#463) - -Patterns passed to pcre2_compile are not guaranteed to be -null-terminated. Also, it can happen that there is an invalid -pattern like this: - - (*LIMIT_HEAP=123 - -If the next byte of memory after the end of the pattern happens -to be a digit, it will be parsed as part of the limit value. Or, -if the next byte is a right parenthesis character, it will be taken -as the end of the (*LIMIT_HEAP=nnn) construct. - -This will result in `skipatstart` being larger than `patlen`, which -will result in underflow and an erroneous call to malloc requesting -a huge number of bytes. ---- - src/pcre2_compile.c | 7 ++- - src/pcre2_internal.h | 3 + - src/pcre2_util.h | 132 ++++++++++++++++++++++++++++++++++++++++++ - testdata/testoutput15 | 4 +- - 4 files changed, 141 insertions(+), 5 deletions(-) - create mode 100644 src/pcre2_util.h - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index e6843bb13..410f220b3 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -10552,12 +10552,12 @@ if ((options & PCRE2_LITERAL) == 0) - ptr += pp; - goto HAD_EARLY_ERROR; - } -- while (IS_DIGIT(ptr[pp])) -+ while (pp < patlen && IS_DIGIT(ptr[pp])) - { - if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */ - c = c*10 + (ptr[pp++] - CHAR_0); - } -- if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) -+ if (pp >= patlen || ptr[pp] != CHAR_RIGHT_PARENTHESIS) - { - errorcode = ERR60; - ptr += pp; -@@ -10566,7 +10566,7 @@ if ((options & PCRE2_LITERAL) == 0) - if (p->type == PSO_LIMH) limit_heap = c; - else if (p->type == PSO_LIMM) limit_match = c; - else limit_depth = c; -- skipatstart += pp - skipatstart; -+ skipatstart = ++pp; - break; - } - break; /* Out of the table scan loop */ -@@ -10574,6 +10574,7 @@ if ((options & PCRE2_LITERAL) == 0) - } - if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */ - } -+ PCRE2_ASSERT(skipatstart <= patlen); - } - - /* End of pattern-start options; advance to start of real regex. */ -diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h -index d8fad1e..edb36ca 100644 ---- a/src/pcre2_internal.h -+++ b/src/pcre2_internal.h -@@ -1999,6 +1999,9 @@ extern void * _pcre2_memmove(void *, const void *, size_t); - #endif - - #endif /* PCRE2_CODE_UNIT_WIDTH */ -+ -+#include "pcre2_util.h" -+ - #endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */ - - /* End of pcre2_internal.h */ -diff --git a/src/pcre2_util.h b/src/pcre2_util.h -new file mode 100644 -index 0000000..ea86355 ---- /dev/null -+++ b/src/pcre2_util.h -@@ -0,0 +1,132 @@ -+/************************************************* -+* Perl-Compatible Regular Expressions * -+*************************************************/ -+ -+/* PCRE2 is a library of functions to support regular expressions whose syntax -+and semantics are as close as possible to those of the Perl 5 language. -+ -+ Written by Philip Hazel -+ Original API code Copyright (c) 1997-2012 University of Cambridge -+ New API code Copyright (c) 2016-2024 University of Cambridge -+ -+----------------------------------------------------------------------------- -+Redistribution and use in source and binary forms, with or without -+modification, are permitted provided that the following conditions are met: -+ -+ * Redistributions of source code must retain the above copyright notice, -+ this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of the University of Cambridge nor the names of its -+ contributors may be used to endorse or promote products derived from -+ this software without specific prior written permission. -+ -+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+POSSIBILITY OF SUCH DAMAGE. -+----------------------------------------------------------------------------- -+*/ -+ -+#ifndef PCRE2_UTIL_H_IDEMPOTENT_GUARD -+#define PCRE2_UTIL_H_IDEMPOTENT_GUARD -+ -+/* Assertion macros */ -+ -+#ifdef PCRE2_DEBUG -+ -+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG) -+#include -+#endif -+ -+/* PCRE2_ASSERT(x) can be used to inject an assert() for conditions -+that the code below doesn't support. It is a NOP for non debug builds -+but in debug builds will print information about the location of the -+code where it triggered and crash. -+ -+It is meant to work like assert(), and therefore the expression used -+should indicate what the expected state is, and shouldn't have any -+side-effects. */ -+ -+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG) -+#define PCRE2_ASSERT(x) assert(x) -+#else -+#define PCRE2_ASSERT(x) do \ -+{ \ -+ if (!(x)) \ -+ { \ -+ fprintf(stderr, "Assertion failed at " __FILE__ ":%d\n", __LINE__); \ -+ abort(); \ -+ } \ -+} while(0) -+#endif -+ -+/* PCRE2_UNREACHABLE() can be used to mark locations on the code that -+shouldn't be reached. In non debug builds is defined as a hint for -+the compiler to eliminate any code after it, so it is useful also for -+performance reasons, but should be used with care because if it is -+ever reached will trigger Undefined Behaviour and if you are lucky a -+crash. In debug builds it will report the location where it was triggered -+and crash. One important point to consider when using this macro, is -+that it is only implemented for a few compilers, and therefore can't -+be relied on to always be active either, so if it is followed by some -+code it is important to make sure that the whole thing is safe to -+use even if the macro is not there (ex: make sure there is a `break` -+after it if used at the end of a `case`) and to test your code also -+with a configuration where the macro will be a NOP. */ -+ -+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG) -+#define PCRE2_UNREACHABLE() \ -+assert(((void)"Execution reached unexpected point", 0)) -+#else -+#define PCRE2_UNREACHABLE() do \ -+{ \ -+fprintf(stderr, "Execution reached unexpected point at " __FILE__ \ -+ ":%d\n", __LINE__); \ -+abort(); \ -+} while(0) -+#endif -+ -+/* PCRE2_DEBUG_UNREACHABLE() is a debug only version of the previous -+macro. It is meant to be used in places where the code is handling -+an error situation in code that shouldn't be reached, but that has -+some sort of fallback code to normally handle the error. When in -+doubt you should use this instead of the previous macro. Like in -+the previous case, it is a good idea to document as much as possible -+the reason and the actions that should be taken if it ever triggers. */ -+ -+#define PCRE2_DEBUG_UNREACHABLE() PCRE2_UNREACHABLE() -+ -+#endif /* PCRE2_DEBUG */ -+ -+#ifndef PCRE2_DEBUG_UNREACHABLE -+#define PCRE2_DEBUG_UNREACHABLE() do {} while(0) -+#endif -+ -+#ifndef PCRE2_UNREACHABLE -+#ifdef HAVE_BUILTIN_UNREACHABLE -+#define PCRE2_UNREACHABLE() __builtin_unreachable() -+#elif defined(HAVE_BUILTIN_ASSUME) -+#define PCRE2_UNREACHABLE() __assume(0) -+#else -+#define PCRE2_UNREACHABLE() do {} while(0) -+#endif -+#endif /* !PCRE2_UNREACHABLE */ -+ -+#ifndef PCRE2_ASSERT -+#define PCRE2_ASSERT(x) do {} while(0) -+#endif -+ -+#endif /* PCRE2_UTIL_H_IDEMPOTENT_GUARD */ -+ -+/* End of pcre2_util.h */ -diff --git a/testdata/testoutput15 b/testdata/testoutput15 -index aa9c5c930..f36faeeaf 100644 ---- a/testdata/testoutput15 -+++ b/testdata/testoutput15 -@@ -111,10 +111,10 @@ Minimum depth limit = 10 - 3: ee - - /(*LIMIT_MATCH=12bc)abc/ --Failed: error 160 at offset 17: (*VERB) not recognized or malformed -+Failed: error 160 at offset 16: (*VERB) not recognized or malformed - - /(*LIMIT_MATCH=4294967290)abc/ --Failed: error 160 at offset 24: (*VERB) not recognized or malformed -+Failed: error 160 at offset 23: (*VERB) not recognized or malformed - - /(*LIMIT_DEPTH=4294967280)abc/I - Capture group count = 0 diff --git a/backport-Implement-PCRE2_EXTRA_CASELESS_RESTRICT-and-related-.patch b/backport-Implement-PCRE2_EXTRA_CASELESS_RESTRICT-and-related-.patch deleted file mode 100644 index 1a10b3202d189b9cbee83484d80deda4c829ca68..0000000000000000000000000000000000000000 --- a/backport-Implement-PCRE2_EXTRA_CASELESS_RESTRICT-and-related-.patch +++ /dev/null @@ -1,1649 +0,0 @@ -From 9a4fd79230cf583153bec4b4749a1864a55c89fb Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Sun, 29 Jan 2023 16:46:24 +0000 -Subject: [PATCH] Implement PCRE2_EXTRA_CASELESS_RESTRICT and related features - -Conflict:don't modify ChangeLog; don't modify maint/* because files don't -exist; adapt context; -Reference:https://github.com/PCRE2Project/pcre2/commit/c13d54f6581fa51a270a1ec40b1b7626d686dec1 - ---- - HACKING | 10 +- - src/pcre2.h.in | 3 +- - src/pcre2_compile.c | 259 ++++++++++++++++++++++++-------------- - src/pcre2_ucd.c | 6 +- - src/pcre2test.c | 21 ++-- - testdata/testinput5 | 97 ++++++++++++++ - testdata/testinput7 | 97 ++++++++++++++ - testdata/testoutput5 | 180 ++++++++++++++++++++++++++ - testdata/testoutput7 | 180 ++++++++++++++++++++++++++ - testdata/testoutput8-16-2 | 2 +- - testdata/testoutput8-8-2 | 2 +- - 11 files changed, 742 insertions(+), 115 deletions(-) - -diff --git a/HACKING b/HACKING -index 2f194db..88ebad5 100644 ---- a/HACKING -+++ b/HACKING -@@ -1,4 +1,4 @@ --Technical Notes about PCRE2 -+Technical notes about PCRE2 - --------------------------- - - These are very rough technical notes that record potentially useful information -@@ -248,7 +248,6 @@ by a length and an offset into the pattern to specify the name. - The following have one data item that follows in the next vector element: - - META_BIGVALUE Next is a literal >= META_END --META_OPTIONS (?i) and friends (data is new option bits) - META_POSIX POSIX class item (data identifies the class) - META_POSIX_NEG negative POSIX class item (ditto) - -@@ -298,6 +297,11 @@ META_MINMAX {n,m} repeat - META_MINMAX_PLUS {n,m}+ repeat - META_MINMAX_QUERY {n,m}? repeat - -+This one is followed by two elements, giving the new option settings for the -+main and extra options, respectively. -+ -+META_OPTIONS (?i) and friends -+ - This one is followed by three elements. The first is 0 for '>' and 1 for '>='; - the next two are the major and minor numbers: - -@@ -827,4 +831,4 @@ not a real opcode, but is used to check at compile time that tables indexed by - opcode are the correct length, in order to catch updating errors. - - Philip Hazel --April 2022 -+January 2023 -diff --git a/src/pcre2.h.in b/src/pcre2.h.in -index 7b8818d..60c2905 100644 ---- a/src/pcre2.h.in -+++ b/src/pcre2.h.in -@@ -5,7 +5,7 @@ - /* This is the public header file for the PCRE library, second API, to be - #included by applications that call PCRE2 functions. - -- Copyright (c) 2016-2021 University of Cambridge -+ Copyright (c) 2016-2023 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -153,6 +153,7 @@ D is inspected during pcre2_dfa_match() execution - #define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */ - #define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */ - #define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */ -+#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */ - - /* These are for pcre2_jit_compile(). */ - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 99ffd29..464c9db 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Original API code Copyright (c) 1997-2012 University of Cambridge -- New API code Copyright (c) 2016-2022 University of Cambridge -+ New API code Copyright (c) 2016-2023 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -118,13 +118,13 @@ them will be able to (i.e. assume a 64-bit world). */ - - #ifdef SUPPORT_UNICODE - static unsigned int -- add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, -+ add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, uint32_t, - compile_block *, const uint32_t *, unsigned int); - #endif - - static int -- compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t, -- uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, -+ compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *, -+ uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, - compile_block *, PCRE2_SIZE *); - - static int -@@ -779,7 +779,7 @@ are allowed. */ - PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_UCP|PCRE2_UNGREEDY) - - #define PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS \ -- (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD) -+ (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD|PCRE2_EXTRA_CASELESS_RESTRICT) - - #define PUBLIC_COMPILE_EXTRA_OPTIONS \ - (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \ -@@ -1059,7 +1059,10 @@ for (;;) - case META_SKIP: fprintf(stderr, "META (*SKIP)"); break; - case META_THEN: fprintf(stderr, "META (*THEN)"); break; - -- case META_OPTIONS: fprintf(stderr, "META_OPTIONS 0x%02x", *pptr++); break; -+ case META_OPTIONS: -+ fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr[0], pptr[1]); -+ pptr += 2; -+ break; - - case META_LOOKBEHIND: - fprintf(stderr, "META (?<= %d offset=", meta_arg); -@@ -1491,6 +1494,7 @@ Arguments: - chptr points to a returned data character - errorcodeptr points to the errorcode variable (containing zero) - options the current options bits -+ xoptions the current extra options bits - isclass TRUE if inside a character class - cb compile data block or NULL when called from pcre2_substitute() - -@@ -1502,7 +1506,7 @@ Returns: zero => a data character - - int - PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr, -- int *errorcodeptr, uint32_t options, uint32_t extra_options, BOOL isclass, -+ int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass, - compile_block *cb) - { - BOOL utf = (options & PCRE2_UTF) != 0; -@@ -1539,7 +1543,7 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) - if (i > 0) - { - c = (uint32_t)i; -- if (c == CHAR_CR && (extra_options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0) -+ if (c == CHAR_CR && (xoptions & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0) - c = CHAR_LF; - } - else /* Negative table entry */ -@@ -1603,7 +1607,7 @@ else - PCRE2_SPTR oldptr; - BOOL overflow; - BOOL alt_bsux = -- ((options & PCRE2_ALT_BSUX) | (extra_options & PCRE2_EXTRA_ALT_BSUX)) != 0; -+ ((options & PCRE2_ALT_BSUX) | (xoptions & PCRE2_EXTRA_ALT_BSUX)) != 0; - - /* Filter calls from pcre2_substitute(). */ - -@@ -1641,7 +1645,7 @@ else - - if (ptr >= ptrend) break; - if (*ptr == CHAR_LEFT_CURLY_BRACKET && -- (extra_options & PCRE2_EXTRA_ALT_BSUX) != 0) -+ (xoptions & PCRE2_EXTRA_ALT_BSUX) != 0) - { - PCRE2_SPTR hptr = ptr + 1; - cc = 0; -@@ -1685,7 +1689,7 @@ else - if (c > 0x10ffffU) *errorcodeptr = ERR77; - else - if (c >= 0xd800 && c <= 0xdfff && -- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) -+ (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) - *errorcodeptr = ERR73; - } - else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77; -@@ -1880,7 +1884,7 @@ else - else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET) - { - if (utf && c >= 0xd800 && c <= 0xdfff && -- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) -+ (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) - { - ptr--; - *errorcodeptr = ERR73; -@@ -1953,7 +1957,7 @@ else - else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET) - { - if (utf && c >= 0xd800 && c <= 0xdfff && -- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) -+ (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) - { - ptr--; - *errorcodeptr = ERR73; -@@ -2564,6 +2568,7 @@ typedef struct nest_save { - uint16_t max_group; - uint16_t flags; - uint32_t options; -+ uint32_t xoptions; - } nest_save; - - #define NSF_RESET 0x0001u -@@ -2578,6 +2583,8 @@ the main compiling phase. */ - #define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \ - PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \ - PCRE2_UNGREEDY) -+ -+#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT) - - /* States used for analyzing ranges in character classes. The two OK values - must be last. */ -@@ -2617,7 +2624,7 @@ uint32_t *this_parsed_item = NULL; - uint32_t *prev_parsed_item = NULL; - uint32_t meta_quantifier = 0; - uint32_t add_after_mark = 0; --uint32_t extra_options = cb->cx->extra_options; -+uint32_t xoptions = cb->cx->extra_options; - uint16_t nest_depth = 0; - int after_manual_callout = 0; - int expect_cond_assert = 0; -@@ -2641,12 +2648,12 @@ nest_save *top_nest, *end_nests; - /* Insert leading items for word and line matching (features provided for the - benefit of pcre2grep). */ - --if ((extra_options & PCRE2_EXTRA_MATCH_LINE) != 0) -+if ((xoptions & PCRE2_EXTRA_MATCH_LINE) != 0) - { - *parsed_pattern++ = META_CIRCUMFLEX; - *parsed_pattern++ = META_NOCAPTURE; - } --else if ((extra_options & PCRE2_EXTRA_MATCH_WORD) != 0) -+else if ((xoptions & PCRE2_EXTRA_MATCH_WORD) != 0) - { - *parsed_pattern++ = META_ESCAPE + ESC_b; - *parsed_pattern++ = META_NOCAPTURE; -@@ -2697,6 +2704,7 @@ while (ptr < ptrend) - int prev_expect_cond_assert; - uint32_t min_repeat = 0, max_repeat = 0; - uint32_t set, unset, *optset; -+ uint32_t xset, xunset, *xoptset; - uint32_t terminator; - uint32_t prev_meta_quantifier; - BOOL prev_okquantifier; -@@ -2834,7 +2842,7 @@ while (ptr < ptrend) - if ((options & PCRE2_ALT_VERBNAMES) != 0) - { - escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, -- cb->cx->extra_options, FALSE, cb); -+ xoptions, FALSE, cb); - if (errorcode != 0) goto FAILED; - } - else escape = 0; /* Treat all as literal */ -@@ -3029,11 +3037,11 @@ while (ptr < ptrend) - case CHAR_BACKSLASH: - tempptr = ptr; - escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, -- cb->cx->extra_options, FALSE, cb); -+ xoptions, FALSE, cb); - if (errorcode != 0) - { - ESCAPE_FAILED: -- if ((extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) -+ if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) - goto FAILED; - ptr = tempptr; - if (ptr >= ptrend) c = CHAR_BACKSLASH; else -@@ -3607,11 +3615,11 @@ while (ptr < ptrend) - { - tempptr = ptr; - escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, -- cb->cx->extra_options, TRUE, cb); -+ xoptions, TRUE, cb); - - if (errorcode != 0) - { -- if ((extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) -+ if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) - goto FAILED; - ptr = tempptr; - if (ptr >= ptrend) c = CHAR_BACKSLASH; else -@@ -3910,6 +3918,7 @@ while (ptr < ptrend) - top_nest->nest_depth = nest_depth; - top_nest->flags = NSF_ATOMICSR; - top_nest->options = options & PARSE_TRACKED_OPTIONS; -+ top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS; - } - break; - #else /* SUPPORT_UNICODE */ -@@ -4042,6 +4051,7 @@ while (ptr < ptrend) - top_nest->nest_depth = nest_depth; - top_nest->flags = 0; - top_nest->options = options & PARSE_TRACKED_OPTIONS; -+ top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS; - - /* Start of non-capturing group that resets the capture count for each - branch. */ -@@ -4056,24 +4066,28 @@ while (ptr < ptrend) - ptr++; - } - -- /* Scan for options imnsxJU to be set or unset. */ -+ /* Scan for options imnrsxJU to be set or unset. */ - - else - { - BOOL hyphenok = TRUE; - uint32_t oldoptions = options; -+ uint32_t oldxoptions = xoptions; - - top_nest->reset_group = 0; - top_nest->max_group = 0; - set = unset = 0; - optset = &set; -+ xset = xunset = 0; -+ xoptset = &xset; - -- /* ^ at the start unsets imnsx and disables the subsequent use of - */ -+ /* ^ at the start unsets irmnsx and disables the subsequent use of - */ - - if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT) - { - options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| - PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE); -+ xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT); - hyphenok = FALSE; - ptr++; - } -@@ -4091,6 +4105,7 @@ while (ptr < ptrend) - goto FAILED; - } - optset = &unset; -+ xoptset = &xunset; - hyphenok = FALSE; - break; - -@@ -4102,6 +4117,7 @@ while (ptr < ptrend) - case CHAR_i: *optset |= PCRE2_CASELESS; break; - case CHAR_m: *optset |= PCRE2_MULTILINE; break; - case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break; -+ case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break; - case CHAR_s: *optset |= PCRE2_DOTALL; break; - case CHAR_U: *optset |= PCRE2_UNGREEDY; break; - -@@ -4132,6 +4148,7 @@ while (ptr < ptrend) - unset |= PCRE2_EXTENDED_MORE; - - options = (options | set) & (~unset); -+ xoptions = (xoptions | xset) & (~xunset); - - /* If the options ended with ')' this is not the start of a nested - group with option changes, so the options change at this level. -@@ -4152,10 +4169,11 @@ while (ptr < ptrend) - - /* If nothing changed, no need to record. */ - -- if (options != oldoptions) -+ if (options != oldoptions || xoptions != oldxoptions) - { - *parsed_pattern++ = META_OPTIONS; - *parsed_pattern++ = options; -+ *parsed_pattern++ = xoptions; - } - } /* End options processing */ - break; /* End default case after (? */ -@@ -4625,6 +4643,7 @@ while (ptr < ptrend) - top_nest->nest_depth = nest_depth; - top_nest->flags = NSF_CONDASSERT; - top_nest->options = options & PARSE_TRACKED_OPTIONS; -+ top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS; - } - break; - -@@ -4758,6 +4777,7 @@ while (ptr < ptrend) - if (top_nest != NULL && top_nest->nest_depth == nest_depth) - { - options = (options & ~PARSE_TRACKED_OPTIONS) | top_nest->options; -+ xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions; - if ((top_nest->flags & NSF_RESET) != 0 && - top_nest->max_group > cb->bracount) - cb->bracount = top_nest->max_group; -@@ -4800,12 +4820,12 @@ parsed_pattern = manage_callouts(ptr, &previous_callout, auto_callout, - /* Insert trailing items for word and line matching (features provided for the - benefit of pcre2grep). */ - --if ((extra_options & PCRE2_EXTRA_MATCH_LINE) != 0) -+if ((xoptions & PCRE2_EXTRA_MATCH_LINE) != 0) - { - *parsed_pattern++ = META_KET; - *parsed_pattern++ = META_DOLLAR; - } --else if ((extra_options & PCRE2_EXTRA_MATCH_WORD) != 0) -+else if ((xoptions & PCRE2_EXTRA_MATCH_WORD) != 0) - { - *parsed_pattern++ = META_KET; - *parsed_pattern++ = META_ESCAPE + ESC_b; -@@ -4933,7 +4953,8 @@ for (;;) - * Get othercase range * - *************************************************/ - --/* This function is passed the start and end of a class range in UCP mode. It -+/* This function is passed the start and end of a class range in UCP mode. For -+single characters the range may be just one character long. The function - searches up the characters, looking for ranges of characters in the "other" - case. Each call returns the next one, updating the start address. A character - with multiple other cases is returned on its own with a special return value. -@@ -4947,18 +4968,19 @@ Arguments: - Yield: -1 when no more - 0 when a range is returned - >0 the CASESET offset for char with multiple other cases -- in this case, ocptr contains the original -+ for this return, *ocptr contains the original - */ - - static int - get_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr, -- uint32_t *odptr) -+ uint32_t *odptr, BOOL restricted) - { - uint32_t c, othercase, next; - unsigned int co; - - /* Find the first character that has an other case. If it has multiple other --cases, return its case offset value. In 32-bit mode, a value -+cases, return its case offset value. When CASELESS_RESTRICT is set, ignore the -+multi-case entries that begin with ASCII values. In 32-bit mode, a value - greater than the Unicode maximum ends the range. */ - - for (c = *cptr; c <= d; c++) -@@ -4966,12 +4988,19 @@ for (c = *cptr; c <= d; c++) - #if PCRE2_CODE_UNIT_WIDTH == 32 - if (c > MAX_UTF_CODE_POINT) return -1; - #endif -- if ((co = UCD_CASESET(c)) != 0) -+ if ((co = UCD_CASESET(c)) != 0 && -+ (!restricted || PRIV(ucd_caseless_sets)[co] > 127)) - { - *ocptr = c++; /* Character that has the set */ - *cptr = c; /* Rest of input range */ - return (int)co; - } -+ -+ /* This is not a valid multiple-case character. Check that the single other -+ case is different to the original. We don't need to check "restricted" here -+ because the non-ASCII characters with multiple cases that include an ASCII -+ character don't have a different "othercase". */ -+ - if ((othercase = UCD_OTHERCASE(c)) != c) break; - } - -@@ -5012,7 +5041,8 @@ add_to_class(). - Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data -- options the options word -+ options the options bits -+ xoptions the extra options bits - cb compile data - start start of range character - end end of range character -@@ -5023,7 +5053,8 @@ Returns: the number of < 256 characters added - - static unsigned int - add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, -- uint32_t options, compile_block *cb, uint32_t start, uint32_t end) -+ uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start, -+ uint32_t end) - { - uint32_t c; - uint32_t classbits_end = (end <= 0xff ? end : 0xff); -@@ -5031,8 +5062,8 @@ unsigned int n8 = 0; - - /* If caseless matching is required, scan the range and process alternate - cases. In Unicode, there are 8-bit characters that have alternate cases that --are greater than 255 and vice-versa. Sometimes we can just extend the original --range. */ -+are greater than 255 and vice-versa (though these may be ignored if caseless -+restriction is in force). Sometimes we can just extend the original range. */ - - if ((options & PCRE2_CASELESS) != 0) - { -@@ -5045,20 +5076,23 @@ if ((options & PCRE2_CASELESS) != 0) - options &= ~PCRE2_CASELESS; /* Remove for recursive calls */ - c = start; - -- while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0) -+ while ((rc = get_othercase_range(&c, end, &oc, &od, -+ (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)) >= 0) - { - /* Handle a single character that has more than one other case. */ - -- if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, options, cb, -- PRIV(ucd_caseless_sets) + rc, oc); -+ if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, -+ options, xoptions, cb, PRIV(ucd_caseless_sets) + rc, oc); - - /* Do nothing if the other case range is within the original range. */ - -- else if (oc >= cb->class_range_start && od <= cb->class_range_end) continue; -+ else if (oc >= cb->class_range_start && od <= cb->class_range_end) -+ continue; - -- /* Extend the original range if there is overlap, noting that if oc < c, we -- can't have od > end because a subrange is always shorter than the basic -- range. Otherwise, use a recursive call to add the additional range. */ -+ /* Extend the original range if there is overlap, noting that if oc < c, -+ we can't have od > end because a subrange is always shorter than the -+ basic range. Otherwise, use a recursive call to add the additional range. -+ */ - - else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ - else if (od > end && oc <= end + 1) -@@ -5066,7 +5100,8 @@ if ((options & PCRE2_CASELESS) != 0) - end = od; /* Extend upwards */ - if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff); - } -- else n8 += add_to_class_internal(classbits, uchardptr, options, cb, oc, od); -+ else n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, -+ cb, oc, od); - } - } - else -@@ -5165,7 +5200,8 @@ add_to_class_internal(), with which it is mutually recursive. - Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data -- options the options word -+ options the options bits -+ xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - except character to omit; this is used when adding lists of -@@ -5178,7 +5214,8 @@ Returns: the number of < 256 characters added - - static unsigned int - add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, -- uint32_t options, compile_block *cb, const uint32_t *p, unsigned int except) -+ uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p, -+ unsigned int except) - { - unsigned int n8 = 0; - while (p[0] < NOTACHAR) -@@ -5187,7 +5224,8 @@ while (p[0] < NOTACHAR) - if (p[0] != except) - { - while(p[n+1] == p[0] + n + 1) n++; -- n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]); -+ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, -+ p[0], p[n]); - } - p += n + 1; - } -@@ -5207,7 +5245,8 @@ to avoid duplication when handling case-independence. - Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data -- options the options word -+ options the options bits -+ xoptions the extra options bits - cb compile data - start start of range character - end end of range character -@@ -5218,11 +5257,12 @@ Returns: the number of < 256 characters added - - static unsigned int - add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, -- compile_block *cb, uint32_t start, uint32_t end) -+ uint32_t xoptions, compile_block *cb, uint32_t start, uint32_t end) - { - cb->class_range_start = start; - cb->class_range_end = end; --return add_to_class_internal(classbits, uchardptr, options, cb, start, end); -+return add_to_class_internal(classbits, uchardptr, options, xoptions, cb, -+ start, end); - } - - -@@ -5239,7 +5279,8 @@ case-independence. - Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data -- options the options word -+ options the options bits -+ xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - except character to omit; this is used when adding lists of -@@ -5252,7 +5293,7 @@ Returns: the number of < 256 characters added - - static unsigned int - add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, -- compile_block *cb, const uint32_t *p, unsigned int except) -+ uint32_t xoptions, compile_block *cb, const uint32_t *p, unsigned int except) - { - unsigned int n8 = 0; - while (p[0] < NOTACHAR) -@@ -5263,7 +5304,8 @@ while (p[0] < NOTACHAR) - while(p[n+1] == p[0] + n + 1) n++; - cb->class_range_start = p[0]; - cb->class_range_end = p[n]; -- n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]); -+ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, -+ p[0], p[n]); - } - p += n + 1; - } -@@ -5282,7 +5324,8 @@ vertical whitespace to a class. The list must be in order. - Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data -- options the options word -+ options the options bits -+ xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - -@@ -5292,16 +5335,16 @@ Returns: the number of < 256 characters added - - static unsigned int - add_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, -- uint32_t options, compile_block *cb, const uint32_t *p) -+ uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p) - { - BOOL utf = (options & PCRE2_UTF) != 0; - unsigned int n8 = 0; - if (p[0] > 0) -- n8 += add_to_class(classbits, uchardptr, options, cb, 0, p[0] - 1); -+ n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, 0, p[0] - 1); - while (p[0] < NOTACHAR) - { - while (p[1] == p[0] + 1) p++; -- n8 += add_to_class(classbits, uchardptr, options, cb, p[0] + 1, -+ n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, p[0] + 1, - (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1); - p++; - } -@@ -5392,6 +5435,7 @@ real compile phase. The value of lengthptr distinguishes the two phases. - - Arguments: - optionsptr pointer to the option bits -+ xoptionsptr pointer to the extra option bits - codeptr points to the pointer to the current code point - pptrptr points to the current parsed pattern pointer - errorcodeptr points to error code variable -@@ -5410,10 +5454,11 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero - */ - - static int --compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr, -- int *errorcodeptr, uint32_t *firstcuptr, uint32_t *firstcuflagsptr, -- uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr, -- compile_block *cb, PCRE2_SIZE *lengthptr) -+compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr, -+ PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr, -+ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, -+ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, -+ PCRE2_SIZE *lengthptr) - { - int bravalue = 0; - int okreturn = -1; -@@ -5422,6 +5467,7 @@ uint32_t repeat_min = 0, repeat_max = 0; /* To please picky compilers */ - uint32_t greedy_default, greedy_non_default; - uint32_t repeat_type, op_type; - uint32_t options = *optionsptr; /* May change dynamically */ -+uint32_t xoptions = *xoptionsptr; /* May change dynamically */ - uint32_t firstcu, reqcu; - uint32_t zeroreqcu, zerofirstcu; - uint32_t escape; -@@ -5447,8 +5493,8 @@ const uint8_t *cbits = cb->cbits; - uint8_t classbits[32]; - - /* We can fish out the UTF setting once and for all into a BOOL, but we must --not do this for other options (e.g. PCRE2_EXTENDED) because they may change --dynamically as we process the pattern. */ -+not do this for other options (e.g. PCRE2_EXTENDED) that may change dynamically -+as we process the pattern. */ - - #ifdef SUPPORT_UNICODE - BOOL utf = (options & PCRE2_UTF) != 0; -@@ -5699,11 +5745,14 @@ for (;; pptr++) - - /* For caseless UTF or UCP mode, check whether this character has more - than one other case. If so, generate a special OP_NOTPROP item instead of -- OP_NOTI. */ -+ OP_NOTI. When restricted by PCRE2_EXTRA_CASELESS_RESTRICT, ignore any -+ caseless set that starts with an ASCII character. */ - - #ifdef SUPPORT_UNICODE - if ((utf||ucp) && (options & PCRE2_CASELESS) != 0 && -- (d = UCD_CASESET(c)) != 0) -+ (d = UCD_CASESET(c)) != 0 && -+ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || -+ PRIV(ucd_caseless_sets)[d] > 127)) - { - *code++ = OP_NOTPROP; - *code++ = PT_CLIST; -@@ -5711,7 +5760,7 @@ for (;; pptr++) - break; /* We are finished with this class */ - } - #endif -- /* Char has only one other case, or UCP not available */ -+ /* Char has only one other (usable) case, or UCP not available */ - - *code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT; - code += PUTCHAR(c, code); -@@ -5721,7 +5770,9 @@ for (;; pptr++) - /* Handle character classes that contain more than just one literal - character. If there are exactly two characters in a positive class, see if - they are case partners. This can be optimized to generate a caseless single -- character match (which also sets first/required code units if relevant). */ -+ character match (which also sets first/required code units if relevant). -+ When casing restrictions apply, ignore a caseless set if both characters -+ are ASCII. */ - - if (meta == META_CLASS && pptr[1] < META_END && pptr[2] < META_END && - pptr[3] == META_CLASS_END) -@@ -5729,7 +5780,9 @@ for (;; pptr++) - uint32_t c = pptr[1]; - - #ifdef SUPPORT_UNICODE -- if (UCD_CASESET(c) == 0) -+ if (UCD_CASESET(c) == 0 || -+ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && -+ c < 128 && pptr[2] < 128)) - #endif - { - uint32_t d; -@@ -5981,22 +6034,24 @@ for (;; pptr++) - - case ESC_h: - (void)add_list_to_class(classbits, &class_uchardata, -- options & ~PCRE2_CASELESS, cb, PRIV(hspace_list), NOTACHAR); -+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list), -+ NOTACHAR); - break; - - case ESC_H: - (void)add_not_list_to_class(classbits, &class_uchardata, -- options & ~PCRE2_CASELESS, cb, PRIV(hspace_list)); -+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list)); - break; - - case ESC_v: - (void)add_list_to_class(classbits, &class_uchardata, -- options & ~PCRE2_CASELESS, cb, PRIV(vspace_list), NOTACHAR); -+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list), -+ NOTACHAR); - break; - - case ESC_V: - (void)add_not_list_to_class(classbits, &class_uchardata, -- options & ~PCRE2_CASELESS, cb, PRIV(vspace_list)); -+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list)); - break; - - /* If Unicode is not supported, \P and \p are not allowed and are -@@ -6070,32 +6125,32 @@ for (;; pptr++) - if (C <= CHAR_i) - { - class_has_8bitchar += -- add_to_class(classbits, &class_uchardata, options, cb, C + uc, -- ((D < CHAR_i)? D : CHAR_i) + uc); -+ add_to_class(classbits, &class_uchardata, options, xoptions, -+ cb, C + uc, ((D < CHAR_i)? D : CHAR_i) + uc); - C = CHAR_j; - } - - if (C <= D && C <= CHAR_r) - { - class_has_8bitchar += -- add_to_class(classbits, &class_uchardata, options, cb, C + uc, -- ((D < CHAR_r)? D : CHAR_r) + uc); -+ add_to_class(classbits, &class_uchardata, options, xoptions, -+ cb, C + uc, ((D < CHAR_r)? D : CHAR_r) + uc); - C = CHAR_s; - } - - if (C <= D) - { - class_has_8bitchar += -- add_to_class(classbits, &class_uchardata, options, cb, C + uc, -- D + uc); -+ add_to_class(classbits, &class_uchardata, options, xoptions, -+ cb, C + uc, D + uc); - } - } - else - #endif - /* Not an EBCDIC special range */ - -- class_has_8bitchar += -- add_to_class(classbits, &class_uchardata, options, cb, c, d); -+ class_has_8bitchar += add_to_class(classbits, &class_uchardata, -+ options, xoptions, cb, c, d); - goto CONTINUE_CLASS; /* Go get the next char in the class */ - } /* End of range handling */ - -@@ -6103,7 +6158,8 @@ for (;; pptr++) - /* Handle a single character. */ - - class_has_8bitchar += -- add_to_class(classbits, &class_uchardata, options, cb, meta, meta); -+ add_to_class(classbits, &class_uchardata, options, xoptions, cb, -+ meta, meta); - } - - /* Continue to the next item in the class. */ -@@ -6341,6 +6397,7 @@ for (;; pptr++) - - case META_OPTIONS: - *optionsptr = options = *(++pptr); -+ *xoptionsptr = xoptions = *(++pptr); - greedy_default = ((options & PCRE2_UNGREEDY) != 0); - greedy_non_default = greedy_default ^ 1; - req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0; -@@ -6586,7 +6643,8 @@ for (;; pptr++) - - if ((group_return = - compile_regex( -- options, /* The option state */ -+ options, /* The options state */ -+ xoptions, /* The extra options state */ - &tempcode, /* Where to put code (updated) */ - &pptr, /* Input pointer (updated) */ - errorcodeptr, /* Where to put an error message */ -@@ -7925,7 +7983,7 @@ for (;; pptr++) - done. However, there's an option, in case anyone was relying on it. */ - - if (cb->assert_depth > 0 && meta_arg == ESC_K && -- (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0) -+ (xoptions & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0) - { - *errorcodeptr = ERR99; - return 0; -@@ -7977,13 +8035,16 @@ for (;; pptr++) - - /* For caseless UTF or UCP mode, check whether this character has more than - one other case. If so, generate a special OP_PROP item instead of OP_CHARI. -- */ -+ When casing restrictions apply, ignore caseless sets that start with an -+ ASCII character. */ - - #ifdef SUPPORT_UNICODE - if ((utf||ucp) && (options & PCRE2_CASELESS) != 0) - { - uint32_t caseset = UCD_CASESET(meta); -- if (caseset != 0) -+ if (caseset != 0 && -+ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || -+ PRIV(ucd_caseless_sets)[caseset] > 127)) - { - *code++ = OP_PROP; - *code++ = PT_CLIST; -@@ -8099,6 +8160,7 @@ the two phases. - - Arguments: - options option bits, including any changes for this subpattern -+ xoptions extra option bits, ditto - codeptr -> the address of the current code pointer - pptrptr -> the address of the current parsed pattern pointer - errorcodeptr -> pointer to error code variable -@@ -8118,10 +8180,11 @@ Returns: 0 There has been an error - */ - - static int --compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr, -- int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr, -- uint32_t *firstcuflagsptr, uint32_t *reqcuptr, uint32_t *reqcuflagsptr, -- branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr) -+compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr, -+ uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits, -+ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, -+ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, -+ PCRE2_SIZE *lengthptr) - { - PCRE2_UCHAR *code = *codeptr; - PCRE2_UCHAR *last_branch = code; -@@ -8217,9 +8280,9 @@ for (;;) - into the length. */ - - if ((branch_return = -- compile_branch(&options, &code, &pptr, errorcodeptr, &branchfirstcu, -- &branchfirstcuflags, &branchreqcu, &branchreqcuflags, &bc, -- cb, (lengthptr == NULL)? NULL : &length)) == 0) -+ compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr, -+ &branchfirstcu, &branchfirstcuflags, &branchreqcu, &branchreqcuflags, -+ &bc, cb, (lengthptr == NULL)? NULL : &length)) == 0) - return 0; - - /* If a branch can match an empty string, so can the whole group. */ -@@ -9224,7 +9287,7 @@ for (;; pptr++) - break; - - case META_OPTIONS: -- pptr += 1; -+ pptr += 2; - break; - - case META_BIGVALUE: -@@ -9727,7 +9790,6 @@ for (; *pptr != META_END; pptr++) - break; - - case META_BIGVALUE: -- case META_OPTIONS: - case META_POSIX: - case META_POSIX_NEG: - pptr += 1; -@@ -9736,6 +9798,7 @@ for (; *pptr != META_END; pptr++) - case META_MINMAX: - case META_MINMAX_QUERY: - case META_MINMAX_PLUS: -+ case META_OPTIONS: - pptr += 2; - break; - -@@ -10251,8 +10314,9 @@ pptr = cb.parsed_pattern; - code = cworkspace; - *code = OP_BRA; - --(void)compile_regex(cb.external_options, &code, &pptr, &errorcode, 0, &firstcu, -- &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, &length); -+(void)compile_regex(cb.external_options, ccontext->extra_options, &code, &pptr, -+ &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, -+ &length); - - if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */ - -@@ -10349,8 +10413,9 @@ of the function here. */ - pptr = cb.parsed_pattern; - code = (PCRE2_UCHAR *)codestart; - *code = OP_BRA; --regexrc = compile_regex(re->overall_options, &code, &pptr, &errorcode, 0, -- &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, NULL); -+regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code, -+ &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, -+ &cb, NULL); - if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY; - re->top_bracket = cb.bracount; - re->top_backref = cb.top_backref; -diff --git a/src/pcre2_ucd.c b/src/pcre2_ucd.c -index 5e0fc37..a72944c 100644 ---- a/src/pcre2_ucd.c -+++ b/src/pcre2_ucd.c -@@ -68,7 +68,7 @@ the tables when not needed. But don't leave a totally empty module because some - compilers barf at that. Instead, just supply some small dummy tables. */ - - #ifndef SUPPORT_UNICODE --const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0 }}; -+const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0}}; - const uint16_t PRIV(ucd_stage1)[] = {0}; - const uint16_t PRIV(ucd_stage2)[] = {0}; - const uint32_t PRIV(ucd_caseless_sets)[] = {0}; -@@ -498,7 +498,7 @@ const ucd_record PRIV(ucd_records)[] = { /* 16908 bytes, record size 12 */ - { 0, 5, 12, 0, 0, 18432, 60, }, /* 70 */ - { 0, 5, 12, 0, 0, 18432, 80, }, /* 71 */ - { 0, 9, 12, 0, -121, 18432, 74, }, /* 72 */ -- { 0, 5, 12, 1, -268, 18432, 70, }, /* 73 */ -+ { 0, 5, 12, 1, 0, 18432, 70, }, /* 73 */ - { 0, 5, 12, 0, 195, 18432, 76, }, /* 74 */ - { 0, 9, 12, 0, 210, 18432, 74, }, /* 75 */ - { 0, 9, 12, 0, 206, 18432, 74, }, /* 76 */ -@@ -1155,7 +1155,7 @@ const ucd_record PRIV(ucd_records)[] = { /* 16908 bytes, record size 12 */ - { 69, 26, 14, 0, 0, 28672, 236, }, /* 727 */ - { 1, 9, 12, 96, -7517, 18432, 74, }, /* 728 */ - { 69, 26, 12, 0, 0, 28672, 118, }, /* 729 */ -- { 0, 9, 12, 100, -8383, 18432, 74, }, /* 730 */ -+ { 0, 9, 12, 100, 0, 18432, 74, }, /* 730 */ - { 0, 9, 12, 104, -8262, 18432, 74, }, /* 731 */ - { 69, 26, 12, 0, 0, 14336, 238, }, /* 732 */ - { 0, 9, 12, 0, 28, 18432, 74, }, /* 733 */ -diff --git a/src/pcre2test.c b/src/pcre2test.c -index 4fa5884..e768798 100644 ---- a/src/pcre2test.c -+++ b/src/pcre2test.c -@@ -653,6 +653,7 @@ static modstruct modlist[] = { - { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) }, - { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, - { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) }, -+ { "caseless_restrict", MOD_CTC, MOD_OPT, PCRE2_EXTRA_CASELESS_RESTRICT, CO(extra_options) }, - { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) }, - { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) }, - { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) }, -@@ -833,14 +834,15 @@ typedef struct c1modstruct { - } c1modstruct; - - static c1modstruct c1modlist[] = { -- { "bincode", 'B', -1 }, -- { "info", 'I', -1 }, -- { "global", 'g', -1 }, -- { "caseless", 'i', -1 }, -- { "multiline", 'm', -1 }, -- { "no_auto_capture", 'n', -1 }, -- { "dotall", 's', -1 }, -- { "extended", 'x', -1 } -+ { "bincode", 'B', -1 }, -+ { "info", 'I', -1 }, -+ { "global", 'g', -1 }, -+ { "caseless", 'i', -1 }, -+ { "multiline", 'm', -1 }, -+ { "no_auto_capture", 'n', -1 }, -+ { "caseless_restrict", 'r', -1 }, -+ { "dotall", 's', -1 }, -+ { "extended", 'x', -1 } - }; - - #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct) -@@ -4257,7 +4259,7 @@ show_compile_extra_options(uint32_t options, const char *before, - const char *after) - { - if (options == 0) fprintf(outfile, "%s %s", before, after); --else fprintf(outfile, "%s%s%s%s%s%s%s%s", -+else fprintf(outfile, "%s%s%s%s%s%s%s%s%s", - before, - ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "", - ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "", -@@ -4265,6 +4267,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s", - ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "", - ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "", - ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "", -+ ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "", - after); - } - -diff --git a/testdata/testinput5 b/testdata/testinput5 -index 6bd352f..b817423 100644 ---- a/testdata/testinput5 -+++ b/testdata/testinput5 -@@ -2212,4 +2212,101 @@ - - /\p{\2b[:xäigi:t:_/ - -+# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without -+# the restriction. -+ -+/AskZ/i,utf,caseless_restrict -+ AskZ -+ aSKz -+\= Expect no match -+ A\x{17f}kZ -+ As\x{212a}Z -+ -+/AskZ/i,utf -+ AskZ -+ aSKz -+ A\x{17f}kZ -+ As\x{212a}Z -+ -+/A\x{17f}\x{212a}Z/ir,utf -+ \= Expect no match -+ AskZ -+ -+/A\x{17f}\x{212a}Z/i,utf -+ AskZ -+ -+/[AskZ]+/i,utf,caseless_restrict -+ AskZ -+ aSKz -+ A\x{17f}kZ -+ As\x{212a}Z -+ -+/[AskZ]+/i,utf -+ AskZ -+ aSKz -+ A\x{17f}kZ -+ As\x{212a}Z -+ -+/[\x{17f}\x{212a}]+/ir,utf -+\= Expect no match -+ AskZ -+ -+/[\x{17f}\x{212a}]+/i,utf -+ AskZ -+ -+/[^s]+/ir,utf -+ A\x{17f}Z -+ -+/[^s]+/i,utf -+ A\x{17f}Z -+ -+/[^k]+/ir,utf -+ A\x{212a}Z -+ -+/[^k]+/i,utf -+ A\x{212a}Z -+ -+/[^sk]+/ir,utf -+ A\x{17f}\x{212a}Z -+ -+/[^sk]+/i,utf -+ A\x{17f}\x{212a}Z -+ -+/[^\x{17f}]+/ir,utf -+ AsSZ -+ -+/[^\x{17f}]+/i,utf -+ AsSZ -+ -+/[Ss]+/irB,utf -+ Sss\x{17f}ss -+ -+/[Ss]+/iB,utf -+ Sss\x{17f}ss -+ -+/[S\x{17f}]/irB,utf -+ -+/[S\x{17f}]/iB,utf -+ -+/[\x{17f}s]/irB,utf -+ -+/[\x{17f}s]/iB,utf -+ -+/[\x{4b}\x{6b}]/irB,utf -+ -+/[\x{4b}\x{6b}]/iB,utf -+ -+/s(?r)s(?-r)s(?r:s)s/i,utf -+ \x{17f}S\x{17f}S\x{17f} -+\= Expect no match -+ \x{17f}\x{17f}\x{17f}S\x{17f} -+ \x{17f}S\x{17f}\x{17f}\x{17f} -+ -+/k(?^i)k/ir,utf -+ K\x{212a} -+\= Expect no match -+ \x{212a}\x{212a} -+ -+# End caseless restrict tests -+ - # End of testinput5 -diff --git a/testdata/testinput7 b/testdata/testinput7 -index 2d90b41..991de88 100644 ---- a/testdata/testinput7 -+++ b/testdata/testinput7 -@@ -2231,4 +2231,101 @@ - /\p{sc:katakana}{3,}?/utf - \x{30a1}\x{30fa}\x{32d0}\x{1b122}\x{ff66}\x{3001}ABC - -+# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without -+# the restriction. -+ -+/AskZ/i,utf,caseless_restrict -+ AskZ -+ aSKz -+\= Expect no match -+ A\x{17f}kZ -+ As\x{212a}Z -+ -+/AskZ/i,utf -+ AskZ -+ aSKz -+ A\x{17f}kZ -+ As\x{212a}Z -+ -+/A\x{17f}\x{212a}Z/ir,utf -+ \= Expect no match -+ AskZ -+ -+/A\x{17f}\x{212a}Z/i,utf -+ AskZ -+ -+/[AskZ]+/i,utf,caseless_restrict -+ AskZ -+ aSKz -+ A\x{17f}kZ -+ As\x{212a}Z -+ -+/[AskZ]+/i,utf -+ AskZ -+ aSKz -+ A\x{17f}kZ -+ As\x{212a}Z -+ -+/[\x{17f}\x{212a}]+/ir,utf -+\= Expect no match -+ AskZ -+ -+/[\x{17f}\x{212a}]+/i,utf -+ AskZ -+ -+/[^s]+/ir,utf -+ A\x{17f}Z -+ -+/[^s]+/i,utf -+ A\x{17f}Z -+ -+/[^k]+/ir,utf -+ A\x{212a}Z -+ -+/[^k]+/i,utf -+ A\x{212a}Z -+ -+/[^sk]+/ir,utf -+ A\x{17f}\x{212a}Z -+ -+/[^sk]+/i,utf -+ A\x{17f}\x{212a}Z -+ -+/[^\x{17f}]+/ir,utf -+ AsSZ -+ -+/[^\x{17f}]+/i,utf -+ AsSZ -+ -+/[Ss]+/irB,utf -+ Sss\x{17f}ss -+ -+/[Ss]+/iB,utf -+ Sss\x{17f}ss -+ -+/[S\x{17f}]/irB,utf -+ -+/[S\x{17f}]/iB,utf -+ -+/[\x{17f}s]/irB,utf -+ -+/[\x{17f}s]/iB,utf -+ -+/[\x{4b}\x{6b}]/irB,utf -+ -+/[\x{4b}\x{6b}]/iB,utf -+ -+/s(?r)s(?-r)s(?r:s)s/i,utf -+ \x{17f}S\x{17f}S\x{17f} -+\= Expect no match -+ \x{17f}\x{17f}\x{17f}S\x{17f} -+ \x{17f}S\x{17f}\x{17f}\x{17f} -+ -+/k(?^i)k/ir,utf -+ K\x{212a} -+\= Expect no match -+ \x{212a}\x{212a} -+ -+# End caseless restrict tests -+ - # End of testinput7 -diff --git a/testdata/testoutput5 b/testdata/testoutput5 -index 2c3fe94..db42a11 100644 ---- a/testdata/testoutput5 -+++ b/testdata/testoutput5 -@@ -5016,4 +5016,184 @@ Failed: error 147 at offset 8: unknown property after \P or \p - /\p{\2b[:xäigi:t:_/ - Failed: error 146 at offset 17: malformed \P or \p sequence - -+# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without -+# the restriction. -+ -+/AskZ/i,utf,caseless_restrict -+ AskZ -+ 0: AskZ -+ aSKz -+ 0: aSKz -+\= Expect no match -+ A\x{17f}kZ -+No match -+ As\x{212a}Z -+No match -+ -+/AskZ/i,utf -+ AskZ -+ 0: AskZ -+ aSKz -+ 0: aSKz -+ A\x{17f}kZ -+ 0: A\x{17f}kZ -+ As\x{212a}Z -+ 0: As\x{212a}Z -+ -+/A\x{17f}\x{212a}Z/ir,utf -+ \= Expect no match -+ AskZ -+No match -+ -+/A\x{17f}\x{212a}Z/i,utf -+ AskZ -+ 0: AskZ -+ -+/[AskZ]+/i,utf,caseless_restrict -+ AskZ -+ 0: AskZ -+ aSKz -+ 0: aSKz -+ A\x{17f}kZ -+ 0: A -+ As\x{212a}Z -+ 0: As -+ -+/[AskZ]+/i,utf -+ AskZ -+ 0: AskZ -+ aSKz -+ 0: aSKz -+ A\x{17f}kZ -+ 0: A\x{17f}kZ -+ As\x{212a}Z -+ 0: As\x{212a}Z -+ -+/[\x{17f}\x{212a}]+/ir,utf -+\= Expect no match -+ AskZ -+No match -+ -+/[\x{17f}\x{212a}]+/i,utf -+ AskZ -+ 0: sk -+ -+/[^s]+/ir,utf -+ A\x{17f}Z -+ 0: A\x{17f}Z -+ -+/[^s]+/i,utf -+ A\x{17f}Z -+ 0: A -+ -+/[^k]+/ir,utf -+ A\x{212a}Z -+ 0: A\x{212a}Z -+ -+/[^k]+/i,utf -+ A\x{212a}Z -+ 0: A -+ -+/[^sk]+/ir,utf -+ A\x{17f}\x{212a}Z -+ 0: A\x{17f}\x{212a}Z -+ -+/[^sk]+/i,utf -+ A\x{17f}\x{212a}Z -+ 0: A -+ -+/[^\x{17f}]+/ir,utf -+ AsSZ -+ 0: AsSZ -+ -+/[^\x{17f}]+/i,utf -+ AsSZ -+ 0: A -+ -+/[Ss]+/irB,utf -+------------------------------------------------------------------ -+ Bra -+ /i S++ -+ Ket -+ End -+------------------------------------------------------------------ -+ Sss\x{17f}ss -+ 0: Sss -+ -+/[Ss]+/iB,utf -+------------------------------------------------------------------ -+ Bra -+ [Ss\x{17f}\x{17f}]++ -+ Ket -+ End -+------------------------------------------------------------------ -+ Sss\x{17f}ss -+ 0: Sss\x{17f}ss -+ -+/[S\x{17f}]/irB,utf -+------------------------------------------------------------------ -+ Bra -+ [Ss\x{17f}] -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/[S\x{17f}]/iB,utf -+------------------------------------------------------------------ -+ Bra -+ [Ss\x{17f}\x{17f}] -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/[\x{17f}s]/irB,utf -+------------------------------------------------------------------ -+ Bra -+ [Ss\x{17f}] -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/[\x{17f}s]/iB,utf -+------------------------------------------------------------------ -+ Bra -+ [Ss\x{17f}\x{17f}] -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/[\x{4b}\x{6b}]/irB,utf -+------------------------------------------------------------------ -+ Bra -+ /i K -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/[\x{4b}\x{6b}]/iB,utf -+------------------------------------------------------------------ -+ Bra -+ [Kk\x{212a}\x{212a}] -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/s(?r)s(?-r)s(?r:s)s/i,utf -+ \x{17f}S\x{17f}S\x{17f} -+ 0: \x{17f}S\x{17f}S\x{17f} -+\= Expect no match -+ \x{17f}\x{17f}\x{17f}S\x{17f} -+No match -+ \x{17f}S\x{17f}\x{17f}\x{17f} -+No match -+ -+/k(?^i)k/ir,utf -+ K\x{212a} -+ 0: K\x{212a} -+\= Expect no match -+ \x{212a}\x{212a} -+No match -+ -+# End caseless restrict tests -+ - # End of testinput5 -diff --git a/testdata/testoutput7 b/testdata/testoutput7 -index 6e71fc8..c2291a1 100644 ---- a/testdata/testoutput7 -+++ b/testdata/testoutput7 -@@ -3756,4 +3756,184 @@ No match - 1: \x{30a1}\x{30fa}\x{32d0}\x{1b122} - 2: \x{30a1}\x{30fa}\x{32d0} - -+# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without -+# the restriction. -+ -+/AskZ/i,utf,caseless_restrict -+ AskZ -+ 0: AskZ -+ aSKz -+ 0: aSKz -+\= Expect no match -+ A\x{17f}kZ -+No match -+ As\x{212a}Z -+No match -+ -+/AskZ/i,utf -+ AskZ -+ 0: AskZ -+ aSKz -+ 0: aSKz -+ A\x{17f}kZ -+ 0: A\x{17f}kZ -+ As\x{212a}Z -+ 0: As\x{212a}Z -+ -+/A\x{17f}\x{212a}Z/ir,utf -+ \= Expect no match -+ AskZ -+No match -+ -+/A\x{17f}\x{212a}Z/i,utf -+ AskZ -+ 0: AskZ -+ -+/[AskZ]+/i,utf,caseless_restrict -+ AskZ -+ 0: AskZ -+ aSKz -+ 0: aSKz -+ A\x{17f}kZ -+ 0: A -+ As\x{212a}Z -+ 0: As -+ -+/[AskZ]+/i,utf -+ AskZ -+ 0: AskZ -+ aSKz -+ 0: aSKz -+ A\x{17f}kZ -+ 0: A\x{17f}kZ -+ As\x{212a}Z -+ 0: As\x{212a}Z -+ -+/[\x{17f}\x{212a}]+/ir,utf -+\= Expect no match -+ AskZ -+No match -+ -+/[\x{17f}\x{212a}]+/i,utf -+ AskZ -+ 0: sk -+ -+/[^s]+/ir,utf -+ A\x{17f}Z -+ 0: A\x{17f}Z -+ -+/[^s]+/i,utf -+ A\x{17f}Z -+ 0: A -+ -+/[^k]+/ir,utf -+ A\x{212a}Z -+ 0: A\x{212a}Z -+ -+/[^k]+/i,utf -+ A\x{212a}Z -+ 0: A -+ -+/[^sk]+/ir,utf -+ A\x{17f}\x{212a}Z -+ 0: A\x{17f}\x{212a}Z -+ -+/[^sk]+/i,utf -+ A\x{17f}\x{212a}Z -+ 0: A -+ -+/[^\x{17f}]+/ir,utf -+ AsSZ -+ 0: AsSZ -+ -+/[^\x{17f}]+/i,utf -+ AsSZ -+ 0: A -+ -+/[Ss]+/irB,utf -+------------------------------------------------------------------ -+ Bra -+ /i S++ -+ Ket -+ End -+------------------------------------------------------------------ -+ Sss\x{17f}ss -+ 0: Sss -+ -+/[Ss]+/iB,utf -+------------------------------------------------------------------ -+ Bra -+ [Ss\x{17f}\x{17f}]++ -+ Ket -+ End -+------------------------------------------------------------------ -+ Sss\x{17f}ss -+ 0: Sss\x{17f}ss -+ -+/[S\x{17f}]/irB,utf -+------------------------------------------------------------------ -+ Bra -+ [Ss\x{17f}] -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/[S\x{17f}]/iB,utf -+------------------------------------------------------------------ -+ Bra -+ [Ss\x{17f}\x{17f}] -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/[\x{17f}s]/irB,utf -+------------------------------------------------------------------ -+ Bra -+ [Ss\x{17f}] -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/[\x{17f}s]/iB,utf -+------------------------------------------------------------------ -+ Bra -+ [Ss\x{17f}\x{17f}] -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/[\x{4b}\x{6b}]/irB,utf -+------------------------------------------------------------------ -+ Bra -+ /i K -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/[\x{4b}\x{6b}]/iB,utf -+------------------------------------------------------------------ -+ Bra -+ [Kk\x{212a}\x{212a}] -+ Ket -+ End -+------------------------------------------------------------------ -+ -+/s(?r)s(?-r)s(?r:s)s/i,utf -+ \x{17f}S\x{17f}S\x{17f} -+ 0: \x{17f}S\x{17f}S\x{17f} -+\= Expect no match -+ \x{17f}\x{17f}\x{17f}S\x{17f} -+No match -+ \x{17f}S\x{17f}\x{17f}\x{17f} -+No match -+ -+/k(?^i)k/ir,utf -+ K\x{212a} -+ 0: K\x{212a} -+\= Expect no match -+ \x{212a}\x{212a} -+No match -+ -+# End caseless restrict tests -+ - # End of testinput7 -diff --git a/testdata/testoutput8-16-2 b/testdata/testoutput8-16-2 -index 569a860..49b1022 100644 ---- a/testdata/testoutput8-16-2 -+++ b/testdata/testoutput8-16-2 -@@ -838,7 +838,7 @@ Memory allocation (code space): 14 - /(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| - ))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) - /parens_nest_limit=1000,-fullbincode --Failed: error 184 at offset 1504: (?| and/or (?J: or (?x: parentheses are too deeply nested -+Failed: error 184 at offset 1129: (?| and/or (?J: or (?x: parentheses are too deeply nested - - # Use "expand" to create some very long patterns with nested parentheses, in - # order to test workspace overflow. Again, this varies with code unit width, -diff --git a/testdata/testoutput8-8-2 b/testdata/testoutput8-8-2 -index 8393d5c..e9568e5 100644 ---- a/testdata/testoutput8-8-2 -+++ b/testdata/testoutput8-8-2 -@@ -838,7 +838,7 @@ Memory allocation (code space): 10 - /(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| - ))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) - /parens_nest_limit=1000,-fullbincode --Failed: error 184 at offset 1504: (?| and/or (?J: or (?x: parentheses are too deeply nested -+Failed: error 184 at offset 1129: (?| and/or (?J: or (?x: parentheses are too deeply nested - - # Use "expand" to create some very long patterns with nested parentheses, in - # order to test workspace overflow. Again, this varies with code unit width, --- -2.23.0 - diff --git a/backport-Improve-error-message-for-N-name-in-character-classes.patch b/backport-Improve-error-message-for-N-name-in-character-classes.patch deleted file mode 100644 index a336b800a7dd240d9d03457d0242bb91c90965d9..0000000000000000000000000000000000000000 --- a/backport-Improve-error-message-for-N-name-in-character-classes.patch +++ /dev/null @@ -1,68 +0,0 @@ -From d704ee40c5324e5ff6c08f009a7aaa3b67b71565 Mon Sep 17 00:00:00 2001 -From: Nicholas Wilson -Date: Fri, 27 Sep 2024 16:31:01 +0100 -Subject: [PATCH] Improve error message for \N{name} in character classes - (#502) - ---- - src/pcre2_compile.c | 8 ++++++++ - testdata/testinput2 | 6 ++++++ - testdata/testoutput2 | 9 +++++++++ - 3 files changed, 23 insertions(+) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index ec4940e63..fd554f1d2 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -1542,6 +1542,14 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) - #endif - } - -+ /* Give an error in contexts where quantifiers are not allowed -+ (character classes; substitution strings). */ -+ -+ else if (isclassorsub || cb == NULL) -+ { -+ *errorcodeptr = ERR37; -+ } -+ - /* Give an error if what follows is not a quantifier, but don't override - an error set by the quantifier reader (e.g. number overflow). */ - -diff --git a/testdata/testinput2 b/testdata/testinput2 -index c6ee980..a33d987 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -913,6 +913,12 @@ - - /\U/I - -+/[\N]/ -+ -+/[\N{4}]/ -+ -+/[\N{name}]/ -+ - /a{1,3}b/ungreedy - ab - -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index 2f2b3d1..4c07b72 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -3245,6 +3245,15 @@ Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, - /\U/I - Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u - -+/[\N]/ -+Failed: error 171 at offset 3: \N is not supported in a class -+ -+/[\N{4}]/ -+Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u -+ -+/[\N{name}]/ -+Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u -+ - /a{1,3}b/ungreedy - ab - 0: ab diff --git a/backport-Improve-error-offsets-for-character-classes-548.patch b/backport-Improve-error-offsets-for-character-classes-548.patch deleted file mode 100644 index 0a714e1611601d5c896f52bfdaa0bc22100fff41..0000000000000000000000000000000000000000 --- a/backport-Improve-error-offsets-for-character-classes-548.patch +++ /dev/null @@ -1,425 +0,0 @@ -From 6185344ed8617ff84a08764e808e5b3667c34a7a Mon Sep 17 00:00:00 2001 -From: Nicholas Wilson -Date: Wed, 6 Nov 2024 08:45:46 +0000 -Subject: [PATCH] Improve error offsets for character classes (#548) - -Conflict:don't modify alt_extended_class because fc38d9e784 is not merged; -don't modify class_op_state because class_op_state is not merged; adapt context -Reference:https://github.com/PCRE2Project/pcre2/commit/6185344ed8617ff84a08764e808e5b3667c34a7a - -* Error offset should be advanced by one character for "[\d-z]" - invalid range error - - The code does a 1-char lookahead for a hyphen, but then doesn't - advance the pointer to consume the hyphen when returning the error. - - Perl's error message (with "use warnings") does advance to just - after the hyphen, so PCRE2 should match. - - Fixes #545. - -* Also improve error offsets for [[:bad:]], [[=...=]] and [z-\p{...}] - cases ---- - src/pcre2_compile.c | 67 +++++++++++++++++++------------------- - testdata/testinput2 | 8 +++++ - testdata/testinput5 | 8 +++++ - testdata/testoutput2 | 76 +++++++++++++++++++++++++------------------- - testdata/testoutput5 | 14 +++++++- - 5 files changed, 106 insertions(+), 67 deletions(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 32db44db..290e759b 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -3563,6 +3563,7 @@ while (ptr < ptrend) - - if (class_range_state == RANGE_STARTED) - { -+ ptr = tempptr + 2; - errorcode = ERR50; - goto FAILED; - } -@@ -3584,8 +3585,9 @@ while (ptr < ptrend) - - if (*ptr != CHAR_COLON) - { -+ ptr = tempptr + 2; - errorcode = ERR13; -- goto FAILED_BACK; -+ goto FAILED; - } - - if (*(++ptr) == CHAR_CIRCUMFLEX_ACCENT) -@@ -3595,19 +3597,18 @@ while (ptr < ptrend) - } - - posix_class = check_posix_name(ptr, (int)(tempptr - ptr)); -+ ptr = tempptr + 2; - if (posix_class < 0) - { - errorcode = ERR30; - goto FAILED; - } -- ptr = tempptr + 2; - - /* Set "a hyphen is forbidden to be the start of a range". For the '-]' - case, the hyphen is treated as a literal, but for '-1' it is disallowed - (because it would be interpreted as range). */ - - class_range_state = RANGE_FORBID_NO; -- class_range_forbid_ptr = ptr; - - /* When PCRE2_UCP is set, unless PCRE2_EXTRA_ASCII_POSIX is set, some - of the POSIX classes are converted to use Unicode properties \p or \P -@@ -3664,6 +3665,7 @@ while (ptr < ptrend) - { - *parsed_pattern++ = CHAR_MINUS; - class_range_state = RANGE_FORBID_STARTED; -+ class_range_forbid_ptr = ptr; - } - - /* Handle a literal character */ -@@ -3746,37 +3748,8 @@ while (ptr < ptrend) - errorcode = ERR7; - ptr--; - goto FAILED; -- } - -- /* The second part of a range can be a single-character escape -- sequence (detected above), but not any of the other escapes. Perl -- treats a hyphen as a literal in such circumstances. However, in Perl's -- warning mode, a warning is given, so PCRE now faults it, as it is -- almost certainly a mistake on the user's part. */ -- -- if (class_range_state == RANGE_STARTED) -- { -- errorcode = ERR50; -- goto FAILED; -- } -- /* Perl gives a warning unless the hyphen following a multi-character -- escape is the last character in the class. PCRE throws an error. */ -- if (class_range_state == RANGE_FORBID_STARTED) -- { -- ptr = class_range_forbid_ptr; -- errorcode = ERR50; -- goto FAILED; -- } -- -- /* Of the remaining escapes, only those that define characters are -- allowed in a class. None may start a range. */ -- -- class_range_state = RANGE_FORBID_NO; -- class_range_forbid_ptr = ptr; -- -- switch(escape) -- { -- case ESC_N: -+ case ESC_N: /* Not permitted by Perl either */ - errorcode = ERR71; - goto FAILED; - -@@ -3813,7 +3786,6 @@ while (ptr < ptrend) - if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P; - *parsed_pattern++ = META_ESCAPE + escape; - *parsed_pattern++ = (ptype << 16) | pdata; -- class_range_forbid_ptr = ptr; - } - #else - errorcode = ERR45; -@@ -3826,6 +3798,33 @@ while (ptr < ptrend) - ptr--; - goto FAILED; - } -+ -+ /* All the switch-cases above which end in "break" describe a set -+ of characters. None may start a range. */ -+ -+ /* The second part of a range can be a single-character escape -+ sequence (detected above), but not any of the other escapes. Perl -+ treats a hyphen as a literal in such circumstances. However, in Perl's -+ warning mode, a warning is given, so PCRE now faults it, as it is -+ almost certainly a mistake on the user's part. */ -+ -+ if (class_range_state == RANGE_STARTED) -+ { -+ errorcode = ERR50; -+ goto FAILED; -+ } -+ -+ /* Perl gives a warning unless the hyphen following a multi-character -+ escape is the last character in the class. PCRE throws an error. */ -+ -+ if (class_range_state == RANGE_FORBID_STARTED) -+ { -+ ptr = class_range_forbid_ptr; -+ errorcode = ERR50; -+ goto FAILED; -+ } -+ -+ class_range_state = RANGE_FORBID_NO; - } - - /* Proceed to next thing in the class. */ -diff --git a/testdata/testinput2 b/testdata/testinput2 -index 61b94e69..1fbb778e 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -7008,4 +7008,12 @@ a)"xI - - /[[:digit:]\Q\E-H]+/ - -+/[z-[:space:]]/ -+ -+/[z-\d]/ -+ -+/[[:space:]-z]/ -+ -+/[\d-z]/ -+ - # End of testinput2 -diff --git a/testdata/testinput5 b/testdata/testinput5 -index 494371b5..f3faeb8f 100644 ---- a/testdata/testinput5 -+++ b/testdata/testinput5 -@@ -2458,4 +2458,12 @@ - /abc/utf,substitute_extended,replace=>\777< - abc - -+/[z-\p{Lu}]/ -+ -+/[z-\pL]/ -+ -+/[\p{Lu}-z]/ -+ -+/[\pL-z]/ -+ - # End of testinput5 -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index 86bfe964..99714596 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -2176,13 +2176,13 @@ Starting code units: % 0 1 A B C D E F G H I J K L M N O P Q R S T U V W - Subject length lower bound = 1 - - /[[.ch.]]/I --Failed: error 113 at offset 1: POSIX collating elements are not supported -+Failed: error 113 at offset 7: POSIX collating elements are not supported - - /[[=ch=]]/I --Failed: error 113 at offset 1: POSIX collating elements are not supported -+Failed: error 113 at offset 7: POSIX collating elements are not supported - - /[[:rhubarb:]]/I --Failed: error 130 at offset 3: unknown POSIX class name -+Failed: error 130 at offset 12: unknown POSIX class name - - /[[:upper:]]/Ii - Capture group count = 0 -@@ -8722,31 +8722,31 @@ Failed: error 162 at offset 4: subpattern name expected - Failed: error 162 at offset 4: subpattern name expected - - /[[:foo:]]/ --Failed: error 130 at offset 3: unknown POSIX class name -+Failed: error 130 at offset 8: unknown POSIX class name - - /[[:1234:]]/ --Failed: error 130 at offset 3: unknown POSIX class name -+Failed: error 130 at offset 9: unknown POSIX class name - - /[[:f\oo:]]/ --Failed: error 130 at offset 3: unknown POSIX class name -+Failed: error 130 at offset 9: unknown POSIX class name - - /[[: :]]/ --Failed: error 130 at offset 3: unknown POSIX class name -+Failed: error 130 at offset 6: unknown POSIX class name - - /[[:...:]]/ --Failed: error 130 at offset 3: unknown POSIX class name -+Failed: error 130 at offset 8: unknown POSIX class name - - /[[:l\ower:]]/ --Failed: error 130 at offset 3: unknown POSIX class name -+Failed: error 130 at offset 11: unknown POSIX class name - - /[[:abc\:]]/ --Failed: error 130 at offset 3: unknown POSIX class name -+Failed: error 130 at offset 9: unknown POSIX class name - - /[abc[:x\]pqr:]]/ --Failed: error 130 at offset 6: unknown POSIX class name -+Failed: error 130 at offset 14: unknown POSIX class name - - /[[:a\dz:]]/ --Failed: error 130 at offset 3: unknown POSIX class name -+Failed: error 130 at offset 9: unknown POSIX class name - - /(^(a|b\g<-1'c))/ - Failed: error 157 at offset 8: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number -@@ -11409,7 +11409,7 @@ Failed: error 171 at offset 4: \N is not supported in a class - aNc - - /a[B-\Nc]/ --Failed: error 150 at offset 6: invalid range in character class -+Failed: error 171 at offset 6: \N is not supported in a class - - /a[B\Nc]/ - Failed: error 171 at offset 5: \N is not supported in a class -@@ -13232,16 +13232,16 @@ Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+} - ------------------------------------------------------------------ - - /[a-[:digit:]]+/ --Failed: error 150 at offset 4: invalid range in character class -+Failed: error 150 at offset 12: invalid range in character class - - /[A-[:digit:]]+/ --Failed: error 150 at offset 4: invalid range in character class -+Failed: error 150 at offset 12: invalid range in character class - - /[a-[.xxx.]]+/ --Failed: error 150 at offset 4: invalid range in character class -+Failed: error 150 at offset 10: invalid range in character class - - /[a-[=xxx=]]+/ --Failed: error 150 at offset 4: invalid range in character class -+Failed: error 150 at offset 10: invalid range in character class - - /[a-[!xxx!]]+/ - Failed: error 108 at offset 3: range out of order in character class -@@ -13362,7 +13362,7 @@ No match - No match - - /[a[:<:]] should give error/ --Failed: error 130 at offset 4: unknown POSIX class name -+Failed: error 130 at offset 7: unknown POSIX class name - - /(?=ab\K)/aftertext,allow_lookaround_bsk - abcd\=startchar -@@ -15510,11 +15510,11 @@ Failed: error 125 at offset 13: lookbehind assertion is not fixed length - # Perl accepts these, but gives a warning. We can't warn, so give an error. - - /[a-[:digit:]]+/ --Failed: error 150 at offset 4: invalid range in character class -+Failed: error 150 at offset 12: invalid range in character class - a-a9-a - - /[A-[:digit:]]+/ --Failed: error 150 at offset 4: invalid range in character class -+Failed: error 150 at offset 12: invalid range in character class - A-A9-A - - /[a-\d]+/ -@@ -15651,7 +15651,7 @@ Failed: error 128 at offset 63: assertion expected after (?( or (?(?C) - .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X - - /[:[:alnum:]-[[a:lnum:]+/ --Failed: error 150 at offset 11: invalid range in character class -+Failed: error 150 at offset 12: invalid range in character class - - /((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ - Failed: error 128 at offset 11: assertion expected after (?( or (?(?C) -@@ -16285,10 +16285,10 @@ Subject length lower bound = 3 - ------------------------------------------------------------------ - - /[Q-\N]/B,bad_escape_is_literal --Failed: error 150 at offset 5: invalid range in character class -+Failed: error 171 at offset 5: \N is not supported in a class - - /[\s-_]/bad_escape_is_literal --Failed: error 150 at offset 3: invalid range in character class -+Failed: error 150 at offset 4: invalid range in character class - - /[_-\s]/bad_escape_is_literal - Failed: error 150 at offset 5: invalid range in character class -@@ -16443,19 +16443,19 @@ No match - No match - - /[[:digit:]-a]/ --Failed: error 150 at offset 10: invalid range in character class -+Failed: error 150 at offset 11: invalid range in character class - - /[[:digit:]-[:print:]]/ --Failed: error 150 at offset 10: invalid range in character class -+Failed: error 150 at offset 11: invalid range in character class - - /[\d-a]/ --Failed: error 150 at offset 3: invalid range in character class -+Failed: error 150 at offset 4: invalid range in character class - - /[\H-z]/ --Failed: error 150 at offset 3: invalid range in character class -+Failed: error 150 at offset 4: invalid range in character class - - /[\d-[:print:]]/ --Failed: error 150 at offset 3: invalid range in character class -+Failed: error 150 at offset 4: invalid range in character class - - # Perl gets the second of these wrong, giving no match. - -@@ -17816,16 +17816,28 @@ Subject length lower bound = 2 - 0: a - - /[[:digit:] -Z]/xx --Failed: error 150 at offset 10: invalid range in character class -+Failed: error 150 at offset 14: invalid range in character class - - /[\d -Z]/xx --Failed: error 150 at offset 3: invalid range in character class -+Failed: error 150 at offset 7: invalid range in character class - - /[[:digit:]\E-H]/ --Failed: error 150 at offset 10: invalid range in character class -+Failed: error 150 at offset 13: invalid range in character class - - /[[:digit:]\Q\E-H]+/ --Failed: error 150 at offset 10: invalid range in character class -+Failed: error 150 at offset 15: invalid range in character class -+ -+/[z-[:space:]]/ -+Failed: error 150 at offset 12: invalid range in character class -+ -+/[z-\d]/ -+Failed: error 150 at offset 5: invalid range in character class -+ -+/[[:space:]-z]/ -+Failed: error 150 at offset 11: invalid range in character class -+ -+/[\d-z]/ -+Failed: error 150 at offset 4: invalid range in character class - - # End of testinput2 - Error -70: PCRE2_ERROR_BADDATA (unknown error number) -diff --git a/testdata/testoutput5 b/testdata/testoutput5 -index bf06ee12..0dba11c6 100644 ---- a/testdata/testoutput5 -+++ b/testdata/testoutput5 -@@ -795,7 +795,7 @@ No match - No match - - /[[:a\x{100}b:]]/utf --Failed: error 130 at offset 3: unknown POSIX class name -+Failed: error 130 at offset 14: unknown POSIX class name - - /a[^]b/utf,allow_empty_class,match_unset_backref - a\x{1234}b -@@ -5403,4 +5403,16 @@ No match - abc - 1: >\x{1ff}< - -+/[z-\p{Lu}]/ -+Failed: error 150 at offset 9: invalid range in character class -+ -+/[z-\pL]/ -+Failed: error 150 at offset 6: invalid range in character class -+ -+/[\p{Lu}-z]/ -+Failed: error 150 at offset 8: invalid range in character class -+ -+/[\pL-z]/ -+Failed: error 150 at offset 5: invalid range in character class -+ - # End of testinput5 --- -2.33.0 - diff --git a/backport-Mend-a-bug-in-pcre2grep-that-caused-separator-lines-.patch b/backport-Mend-a-bug-in-pcre2grep-that-caused-separator-lines-.patch deleted file mode 100644 index 9dc37f352aa62b5f7523e0ca0fd1d8c2780dcddc..0000000000000000000000000000000000000000 --- a/backport-Mend-a-bug-in-pcre2grep-that-caused-separator-lines-.patch +++ /dev/null @@ -1,223 +0,0 @@ -From f34fc0a34ab18d7cb0ff27eacaea43912d797a27 Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Wed, 27 Nov 2024 15:15:45 +0000 -Subject: [PATCH] Mend a bug in pcre2grep that caused separator lines to -be - incorrectly inserted in some cases when above/below context lines are - contiguous. Reported by Alejandro Colomar . Fixes -GitHub - issue #577. - -Conflict:adapt context; don't modify ChangeLog; don't use -group_separator because e179a4b8c is not merged -Reference:https://github.com/PCRE2Project/pcre2/commit/f34fc0a34ab18d7cb0ff27eacaea43912d797a27 - ---- - RunGrepTest | 6 +++++- - src/pcre2grep.c | 19 ++++++++++++++++-- - testdata/grepinput | 19 ++++++++++++++++++ - testdata/grepoutput | 48 ++++++++++++++++++++++++++++++++++++--------- - 4 files changed, 80 insertions(+), 12 deletions(-) - -diff --git a/RunGrepTest b/RunGrepTest -index 0a00e82..0d57707 100755 ---- a/RunGrepTest -+++ b/RunGrepTest -@@ -853,7 +853,11 @@ fi - echo "---------------------------- Test 151 -----------------------------" >>testtrygrep - (cd $srcdir; $valgrind $vjs $pcre2grep --colour=always -e this -e The -e 'The wo' testdata/grepinputv) >>testtrygrep - -- -+echo "---------------------------- Test 160 -----------------------------" >>testtrygrep -+(cd $srcdir; $valgrind $vjs $pcre2grep -nC3 '^(ert|jkl)' ./testdata/grepinput) >>testtrygrep -+echo "RC=$?" >>testtrygrep -+(cd $srcdir; $valgrind $vjs $pcre2grep -n -B4 -A2 '^(ert|dfg)' ./testdata/grepinput) >>testtrygrep -+echo "RC=$?" >>testtrygrep - - - # Now compare the results. -diff --git a/src/pcre2grep.c b/src/pcre2grep.c -index 6a5841c..3b79f26 100644 ---- a/src/pcre2grep.c -+++ b/src/pcre2grep.c -@@ -2940,12 +2940,15 @@ while (ptr < endptr) - FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); - lastmatchrestart = pp; - } -+ - if (lastmatchrestart != ptr) hyphenpending = TRUE; - } - -- /* If there were non-contiguous lines printed above, insert hyphens. */ -+ /* If hyphenpending is TRUE when there is no "after" context, it means we -+ are at the start of a new file, having output something from the previous -+ file. Output a separator if enabled.*/ - -- if (hyphenpending) -+ else if (hyphenpending) - { - fprintf(stdout, "--" STDOUT_NL); - hyphenpending = FALSE; -@@ -2970,6 +2973,7 @@ while (ptr < endptr) - - if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted) - fprintf(stdout, "--" STDOUT_NL); -+ hyphenpending = FALSE; - - while (p < ptr) - { -@@ -2984,12 +2988,23 @@ while (ptr < endptr) - } - } - -+ /* If hyphenpending is TRUE here, it was set after outputting some -+ "after" lines (and there are no "before" lines). */ -+ -+ else if (hyphenpending) -+ { -+ fprintf(stdout, "--" STDOUT_NL); -+ hyphenpending = FALSE; -+ hyphenprinted = TRUE; -+ } -+ - /* Now print the matching line(s); ensure we set hyphenpending at the end - of the file if any context lines are being output. */ - - if (after_context > 0 || before_context > 0) - endhyphenpending = TRUE; - -+ - if (printname != NULL) fprintf(stdout, "%s%c", printname, - printname_colon); - if (number) fprintf(stdout, "%lu:", linenumber); -diff --git a/testdata/grepinput b/testdata/grepinput -index 1e2ceb4..91d3db8 100644 ---- a/testdata/grepinput -+++ b/testdata/grepinput -@@ -617,6 +617,25 @@ match 5: - Rhubarb - Custard Tart - -+zxc -+cvb -+bnm -+asd -+qwe -+ert -+tyu -+uio -+ggg -+asd -+dfg -+ghj -+jkl -+abc -+def -+ghi -+xyz -+ -+ - PUT NEW DATA ABOVE THIS LINE. - ============================= - -diff --git a/testdata/grepoutput b/testdata/grepoutput -index aa53aab..df658ed 100644 ---- a/testdata/grepoutput -+++ b/testdata/grepoutput -@@ -10,7 +10,7 @@ RC=0 - 7:PATTERN at the start of a line. - 8:In the middle of a line, PATTERN appears. - 10:This pattern is in lower case. --623:Check up on PATTERN near the end. -+642:Check up on PATTERN near the end. - RC=0 - ---------------------------- Test 4 ------------------------------ - 4 -@@ -19,7 +19,7 @@ RC=0 - ./testdata/grepinput:7:PATTERN at the start of a line. - ./testdata/grepinput:8:In the middle of a line, PATTERN appears. - ./testdata/grepinput:10:This pattern is in lower case. --./testdata/grepinput:623:Check up on PATTERN near the end. -+./testdata/grepinput:642:Check up on PATTERN near the end. - ./testdata/grepinputx:3:Here is the pattern again. - ./testdata/grepinputx:5:Pattern - ./testdata/grepinputx:42:This line contains pattern not on a line by itself. -@@ -28,7 +28,7 @@ RC=0 - 7:PATTERN at the start of a line. - 8:In the middle of a line, PATTERN appears. - 10:This pattern is in lower case. --623:Check up on PATTERN near the end. -+642:Check up on PATTERN near the end. - 3:Here is the pattern again. - 5:Pattern - 42:This line contains pattern not on a line by itself. -@@ -104,6 +104,7 @@ pcre2grep: Error in command-line regex at offset 4: quantifier does not follow a - RC=2 - ---------------------------- Test 16 ----------------------------- - pcre2grep: Failed to open ./testdata/nonexistfile: No such file or directory -+./testdata/grepinput:abc - RC=2 - ---------------------------- Test 17 ----------------------------- - features should be added at the end, because some of the tests involve the -@@ -324,10 +325,10 @@ RC=0 - ./testdata/grepinput-9- - ./testdata/grepinput:10:This pattern is in lower case. - -- --./testdata/grepinput-620-PUT NEW DATA ABOVE THIS LINE. --./testdata/grepinput-621-============================= --./testdata/grepinput-622- --./testdata/grepinput:623:Check up on PATTERN near the end. -+./testdata/grepinput-639-PUT NEW DATA ABOVE THIS LINE. -+./testdata/grepinput-640-============================= -+./testdata/grepinput-641- -+./testdata/grepinput:642:Check up on PATTERN near the end. - -- - ./testdata/grepinputx-1-This is a second file of input for the pcregrep tests. - ./testdata/grepinputx-2- -@@ -349,8 +350,8 @@ RC=0 - ./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24KiB long. - ./testdata/grepinput-13- - -- --./testdata/grepinput:623:Check up on PATTERN near the end. --./testdata/grepinput-624-This is the last line of this file. -+./testdata/grepinput:642:Check up on PATTERN near the end. -+./testdata/grepinput-643-This is the last line of this file. - -- - ./testdata/grepinputx:3:Here is the pattern again. - ./testdata/grepinputx-4- -@@ -1232,3 +1233,32 @@ RC=2 - The word is cat in this line - The caterpillar sat on the mat - The snowcat is not an animal -+---------------------------- Test 160 ----------------------------- -+622-bnm -+623-asd -+624-qwe -+625:ert -+626-tyu -+627-uio -+628-ggg -+629-asd -+630-dfg -+631-ghj -+632:jkl -+633-abc -+634-def -+635-ghi -+RC=0 -+621-cvb -+622-bnm -+623-asd -+624-qwe -+625:ert -+626-tyu -+627-uio -+628-ggg -+629-asd -+630:dfg -+631-ghj -+632-jkl -+RC=0 --- -2.33.0 - diff --git a/backport-Non-recursive-scan-prefix-in-JIT-560.patch b/backport-Non-recursive-scan-prefix-in-JIT-560.patch deleted file mode 100644 index f4ac1b57395db48f5bc46a71a1f88e1e71d37756..0000000000000000000000000000000000000000 --- a/backport-Non-recursive-scan-prefix-in-JIT-560.patch +++ /dev/null @@ -1,459 +0,0 @@ -From 6f2da25f009ff463cd9357ae5ebe452fbec8ab5c Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg -Date: Fri, 15 Nov 2024 13:21:03 +0100 -Subject: [PATCH] Non-recursive scan prefix in JIT (#560) - -Conflict:NA -Reference:https://github.com/PCRE2Project/pcre2/commit/6f2da25f009ff463cd9357ae5ebe452fbec8ab5c - ---- - src/pcre2_jit_compile.c | 238 ++++++++++++++++++++++++++++------------ - src/pcre2_jit_test.c | 1 + - 2 files changed, 168 insertions(+), 71 deletions(-) - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 127c393d..4449d59f 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -5670,11 +5670,38 @@ if (last) - chars->last_count++; - } - --static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count) -+/* Value can be increased if needed. Patterns -+such as /(a|){33}b/ can exhaust the stack. -+ -+Note: /(a|){29}b/ already stops scan_prefix() -+because it reaches the maximum step_count. */ -+#define SCAN_PREFIX_STACK_END 32 -+ -+/* -+Scan prefix stores the prefix string in the chars array. -+The elements of the chars array is either small character -+sets or "any" (count is set to 255). -+ -+Examples (the chars array is represented by a simple regex): -+ -+/(abc|xbyd)/ prefix: /[ax]b[cy]/ (length: 3) -+/a[a-z]b+c/ prefix: a.b (length: 3) -+/ab?cd/ prefix: a[bc][cd] (length: 3) -+/(ab|cd)|(ef|gh)/ prefix: [aceg][bdfh] (length: 2) -+ -+The length is returned by scan_prefix(). The length is -+less than or equal than the minimum length of the pattern. -+*/ -+ -+static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars) - { --/* Recursive function, which scans prefix literals. */ -+fast_forward_char_data *chars_start = chars; -+fast_forward_char_data *chars_end = chars + MAX_N_CHARS; -+PCRE2_SPTR cc_stack[SCAN_PREFIX_STACK_END]; -+fast_forward_char_data *chars_stack[SCAN_PREFIX_STACK_END]; -+sljit_u8 next_alternative_stack[SCAN_PREFIX_STACK_END]; - BOOL last, any, class, caseless; --int len, repeat, len_save, consumed = 0; -+int stack_ptr, step_count, repeat, len, len_save; - sljit_u32 chr; /* Any unicode character. */ - sljit_u8 *bytes, *bytes_end, byte; - PCRE2_SPTR alternative, cc_save, oc; -@@ -5687,11 +5714,44 @@ PCRE2_UCHAR othercase[1]; - #endif - - repeat = 1; -+stack_ptr = 0; -+step_count = 10000; - while (TRUE) - { -- if (*rec_count == 0) -+ if (--step_count == 0) - return 0; -- (*rec_count)--; -+ -+ SLJIT_ASSERT(chars <= chars_start + MAX_N_CHARS); -+ -+ if (chars >= chars_end) -+ { -+ if (stack_ptr == 0) -+ return (int)(chars_end - chars_start); -+ -+ --stack_ptr; -+ cc = cc_stack[stack_ptr]; -+ chars = chars_stack[stack_ptr]; -+ -+ if (chars >= chars_end) -+ continue; -+ -+ if (next_alternative_stack[stack_ptr] != 0) -+ { -+ /* When an alternative is processed, the -+ next alternative is pushed onto the stack. */ -+ SLJIT_ASSERT(*cc == OP_ALT); -+ alternative = cc + GET(cc, 1); -+ if (*alternative == OP_ALT) -+ { -+ SLJIT_ASSERT(stack_ptr < SCAN_PREFIX_STACK_END); -+ SLJIT_ASSERT(chars_stack[stack_ptr] == chars); -+ SLJIT_ASSERT(next_alternative_stack[stack_ptr] == 1); -+ cc_stack[stack_ptr] = alternative; -+ stack_ptr++; -+ } -+ cc += 1 + LINK_SIZE; -+ } -+ } - - last = TRUE; - any = FALSE; -@@ -5768,9 +5828,17 @@ while (TRUE) - #ifdef SUPPORT_UNICODE - if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); - #endif -- max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count); -- if (max_chars == 0) -- return consumed; -+ if (stack_ptr >= SCAN_PREFIX_STACK_END) -+ { -+ chars_end = chars; -+ continue; -+ } -+ -+ cc_stack[stack_ptr] = cc + len; -+ chars_stack[stack_ptr] = chars; -+ next_alternative_stack[stack_ptr] = 0; -+ stack_ptr++; -+ - last = FALSE; - break; - -@@ -5788,12 +5856,18 @@ while (TRUE) - case OP_CBRA: - case OP_CBRAPOS: - alternative = cc + GET(cc, 1); -- while (*alternative == OP_ALT) -+ if (*alternative == OP_ALT) - { -- max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count); -- if (max_chars == 0) -- return consumed; -- alternative += GET(alternative, 1); -+ if (stack_ptr >= SCAN_PREFIX_STACK_END) -+ { -+ chars_end = chars; -+ continue; -+ } -+ -+ cc_stack[stack_ptr] = alternative; -+ chars_stack[stack_ptr] = chars; -+ next_alternative_stack[stack_ptr] = 1; -+ stack_ptr++; - } - - if (*cc == OP_CBRA || *cc == OP_CBRAPOS) -@@ -5804,14 +5878,21 @@ while (TRUE) - case OP_CLASS: - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE)) -- return consumed; -+ { -+ chars_end = chars; -+ continue; -+ } - #endif - class = TRUE; - break; - - case OP_NCLASS: - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -- if (common->utf) return consumed; -+ if (common->utf) -+ { -+ chars_end = chars; -+ continue; -+ } - #endif - class = TRUE; - break; -@@ -5819,7 +5900,11 @@ while (TRUE) - #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 - case OP_XCLASS: - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -- if (common->utf) return consumed; -+ if (common->utf) -+ { -+ chars_end = chars; -+ continue; -+ } - #endif - any = TRUE; - cc += GET(cc, 1); -@@ -5829,7 +5914,10 @@ while (TRUE) - case OP_DIGIT: - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) -- return consumed; -+ { -+ chars_end = chars; -+ continue; -+ } - #endif - any = TRUE; - cc++; -@@ -5838,7 +5926,10 @@ while (TRUE) - case OP_WHITESPACE: - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) -- return consumed; -+ { -+ chars_end = chars; -+ continue; -+ } - #endif - any = TRUE; - cc++; -@@ -5847,7 +5938,10 @@ while (TRUE) - case OP_WORDCHAR: - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) -- return consumed; -+ { -+ chars_end = chars; -+ continue; -+ } - #endif - any = TRUE; - cc++; -@@ -5863,7 +5957,11 @@ while (TRUE) - case OP_ANY: - case OP_ALLANY: - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -- if (common->utf) return consumed; -+ if (common->utf) -+ { -+ chars_end = chars; -+ continue; -+ } - #endif - any = TRUE; - cc++; -@@ -5873,7 +5971,11 @@ while (TRUE) - case OP_NOTPROP: - case OP_PROP: - #if PCRE2_CODE_UNIT_WIDTH != 32 -- if (common->utf) return consumed; -+ if (common->utf) -+ { -+ chars_end = chars; -+ continue; -+ } - #endif - any = TRUE; - cc += 1 + 2; -@@ -5888,7 +5990,11 @@ while (TRUE) - case OP_NOTEXACT: - case OP_NOTEXACTI: - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -- if (common->utf) return consumed; -+ if (common->utf) -+ { -+ chars_end = chars; -+ continue; -+ } - #endif - any = TRUE; - repeat = GET2(cc, 1); -@@ -5896,21 +6002,20 @@ while (TRUE) - break; - - default: -- return consumed; -+ chars_end = chars; -+ continue; - } - -+ SLJIT_ASSERT(chars < chars_end); -+ - if (any) - { - do - { - chars->count = 255; -- -- consumed++; -- if (--max_chars == 0) -- return consumed; - chars++; - } -- while (--repeat > 0); -+ while (--repeat > 0 && chars < chars_end); - - repeat = 1; - continue; -@@ -5921,17 +6026,27 @@ while (TRUE) - bytes = (sljit_u8*) (cc + 1); - cc += 1 + 32 / sizeof(PCRE2_UCHAR); - -+ SLJIT_ASSERT(last == TRUE && repeat == 1); - switch (*cc) - { -- case OP_CRSTAR: -- case OP_CRMINSTAR: -- case OP_CRPOSSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - case OP_CRPOSQUERY: -- max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count); -- if (max_chars == 0) -- return consumed; -+ last = FALSE; -+ /* Fall through */ -+ case OP_CRSTAR: -+ case OP_CRMINSTAR: -+ case OP_CRPOSSTAR: -+ if (stack_ptr >= SCAN_PREFIX_STACK_END) -+ { -+ chars_end = chars; -+ continue; -+ } -+ -+ cc_stack[stack_ptr] = ++cc; -+ chars_stack[stack_ptr] = chars; -+ next_alternative_stack[stack_ptr] = 0; -+ stack_ptr++; - break; - - default: -@@ -5945,7 +6060,13 @@ while (TRUE) - case OP_CRPOSRANGE: - repeat = GET2(cc, 1); - if (repeat <= 0) -- return consumed; -+ { -+ chars_end = chars; -+ continue; -+ } -+ -+ last = (repeat != (int)GET2(cc, 1 + IMM2_SIZE)); -+ cc += 1 + 2 * IMM2_SIZE; - break; - } - -@@ -5980,36 +6101,13 @@ while (TRUE) - bytes = bytes_end - 32; - } - -- consumed++; -- if (--max_chars == 0) -- return consumed; - chars++; - } -- while (--repeat > 0); -- -- switch (*cc) -- { -- case OP_CRSTAR: -- case OP_CRMINSTAR: -- case OP_CRPOSSTAR: -- return consumed; -- -- case OP_CRQUERY: -- case OP_CRMINQUERY: -- case OP_CRPOSQUERY: -- cc++; -- break; -- -- case OP_CRRANGE: -- case OP_CRMINRANGE: -- case OP_CRPOSRANGE: -- if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE)) -- return consumed; -- cc += 1 + 2 * IMM2_SIZE; -- break; -- } -+ while (--repeat > 0 && chars < chars_end); - - repeat = 1; -+ if (last) -+ chars_end = chars; - continue; - } - -@@ -6025,7 +6123,10 @@ while (TRUE) - { - GETCHAR(chr, cc); - if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len) -- return consumed; -+ { -+ chars_end = chars; -+ continue; -+ } - } - else - #endif -@@ -6056,7 +6157,6 @@ while (TRUE) - do - { - len--; -- consumed++; - - chr = *cc; - add_prefix_char(*cc, chars, len == 0); -@@ -6064,15 +6164,13 @@ while (TRUE) - if (caseless) - add_prefix_char(*oc, chars, len == 0); - -- if (--max_chars == 0) -- return consumed; - chars++; - cc++; - oc++; - } -- while (len > 0); -+ while (len > 0 && chars < chars_end); - -- if (--repeat == 0) -+ if (--repeat == 0 || chars >= chars_end) - break; - - len = len_save; -@@ -6081,7 +6179,7 @@ while (TRUE) - - repeat = 1; - if (last) -- return consumed; -+ chars_end = chars; - } - } - -@@ -6251,7 +6349,6 @@ int i, max, from; - int range_right = -1, range_len; - sljit_u8 *update_table = NULL; - BOOL in_range; --sljit_u32 rec_count; - - for (i = 0; i < MAX_N_CHARS; i++) - { -@@ -6259,8 +6356,7 @@ for (i = 0; i < MAX_N_CHARS; i++) - chars[i].last_count = 0; - } - --rec_count = 10000; --max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count); -+max = scan_prefix(common, common->start, chars); - - if (max < 1) - return FALSE; -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index 28bc7af9..066095fe 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -286,6 +286,7 @@ static struct regression_test_case regression_test_cases[] = { - { CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" }, - { MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" }, - { MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" }, -+ { M, A, 0, 0, "(?:a?|a)b", "ba" }, - - /* Greedy and non-greedy + operators */ - { MU, A, 0, 0, "(aa)+aa", "aaaaaaa" }, --- -2.33.0 - diff --git a/backport-Remove-incorrect-optimization-in-DFA-matching-when-p.patch b/backport-Remove-incorrect-optimization-in-DFA-matching-when-p.patch deleted file mode 100644 index ee851c9c70eabe02ced2517a530936076f3ac402..0000000000000000000000000000000000000000 --- a/backport-Remove-incorrect-optimization-in-DFA-matching-when-p.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 674b6640e702705e8e90125f972197fae3aa364d Mon Sep 17 00:00:00 2001 -From: Philip Hazel -Date: Sat, 3 Aug 2024 17:18:56 +0100 -Subject: [PATCH] Remove incorrect optimization in DFA matching when partial - matching and (*F) are involved - -Conflict:don't modify ChangeLog; adapt context -Reference:https://github.com/PCRE2Project/pcre2/commit/674b6640e702705e8e90125f972197fae3aa364d - ---- - src/pcre2_dfa_match.c | 10 +--------- - testdata/testinput6 | 9 +++++++++ - testdata/testoutput6 | 13 +++++++++++++ - 3 files changed, 23 insertions(+), 9 deletions(-) - -diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c -index 60f6b4f..1c4495f 100644 ---- a/src/pcre2_dfa_match.c -+++ b/src/pcre2_dfa_match.c -@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Original API code Copyright (c) 1997-2012 University of Cambridge -- New API code Copyright (c) 2016-2022 University of Cambridge -+ New API code Copyright (c) 2016-2024 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -693,7 +693,6 @@ for (;;) - int i, j; - int clen, dlen; - uint32_t c, d; -- int forced_fail = 0; - BOOL partial_newline = FALSE; - BOOL could_continue = reset_could_continue; - reset_could_continue = FALSE; -@@ -2765,7 +2764,6 @@ for (;;) - though the other "backtracking verbs" are not supported. */ - - case OP_FAIL: -- forced_fail++; /* Count FAILs for multiple states */ - break; - - case OP_ASSERT: -@@ -3247,18 +3245,12 @@ for (;;) - matches that we are going to find. If partial matching has been requested, - check for appropriate conditions. - -- The "forced_ fail" variable counts the number of (*F) encountered for the -- character. If it is equal to the original active_count (saved in -- workspace[1]) it means that (*F) was found on every active state. In this -- case we don't want to give a partial match. -- - The "could_continue" variable is true if a state could have continued but - for the fact that the end of the subject was reached. */ - - if (new_count <= 0) - { - if (could_continue && /* Some could go on, and */ -- forced_fail != workspace[1] && /* Not all forced fail & */ - ( /* either... */ - (mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */ - || /* or... */ -diff --git a/testdata/testinput6 b/testdata/testinput6 -index 0ca0d23..b71a69c 100644 ---- a/testdata/testinput6 -+++ b/testdata/testinput6 -@@ -4392,9 +4392,18 @@ - - /Z(*F)Q|ZXY/ - Z\=ps -+ XY\=dfa_restart - \= Expect no match - ZA\=ps - X\=ps -+ -+/Z(?:(*F)Q|XY)/ -+ Z\=ps -+ XY\=dfa_restart -+ -+/Z(*F)Q|Z(*F)XY/ -+\= Expect no match -+ Z\=ps - - /\bthe cat\b/ - the cat\=ps -diff --git a/testdata/testoutput6 b/testdata/testoutput6 -index 607b572..38c653e 100644 ---- a/testdata/testoutput6 -+++ b/testdata/testoutput6 -@@ -6769,11 +6769,24 @@ Partial match: dogs - /Z(*F)Q|ZXY/ - Z\=ps - Partial match: Z -+ XY\=dfa_restart -+ 0: XY - \= Expect no match - ZA\=ps - No match - X\=ps - No match -+ -+/Z(?:(*F)Q|XY)/ -+ Z\=ps -+Partial match: Z -+ XY\=dfa_restart -+ 0: XY -+ -+/Z(*F)Q|Z(*F)XY/ -+\= Expect no match -+ Z\=ps -+No match - - /\bthe cat\b/ - the cat\=ps --- -2.43.0 - diff --git a/backport-Sanity-checks-for-ctype-functions-342.patch b/backport-Sanity-checks-for-ctype-functions-342.patch deleted file mode 100644 index 5784f769c0fd9c97591175252a8970a80d7baadd..0000000000000000000000000000000000000000 --- a/backport-Sanity-checks-for-ctype-functions-342.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 9783ca9bed0cfb682e7bc76ed605aeb38571930a Mon Sep 17 00:00:00 2001 -From: Addison Crump -Date: Sat, 18 Nov 2023 16:52:00 +0100 -Subject: [PATCH] Sanity checks for ctype functions (#342) - -* fixup: sanity checks for ctype functions - -* format - -* more grep fixes - -* don't check if constrained by type ---- - src/pcre2_compile.c | 4 ++++ - src/pcre2_convert.c | 8 ++++++++ - src/pcre2grep.c | 10 +++++----- - 3 files changed, 17 insertions(+), 5 deletions(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index feb5bcd..b3e4969 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -2194,7 +2194,11 @@ if (c == CHAR_LEFT_CURLY_BRACKET) - { - if (ptr >= cb->end_pattern) goto ERROR_RETURN; - c = *ptr++; -+#if PCRE2_CODE_UNIT_WIDTH != 8 -+ while (c == '_' || c == '-' || (c <= 0xff && isspace(c))) -+#else - while (c == '_' || c == '-' || isspace(c)) -+#endif - { - if (ptr >= cb->end_pattern) goto ERROR_RETURN; - c = *ptr++; -diff --git a/src/pcre2_convert.c b/src/pcre2_convert.c -index 36466e4..fe396ae 100644 ---- a/src/pcre2_convert.c -+++ b/src/pcre2_convert.c -@@ -540,6 +540,14 @@ Returns: !0 => character is found in the class - static BOOL - convert_glob_char_in_class(int class_index, PCRE2_UCHAR c) - { -+#if PCRE2_CODE_UNIT_WIDTH != 8 -+if (c > 0xff) -+ { -+ /* ctype functions are not sane for c > 0xff */ -+ return 0; -+ } -+#endif -+ - switch (class_index) - { - case 1: return isalnum(c); -diff --git a/src/pcre2grep.c b/src/pcre2grep.c -index 776aa28..73cf45a 100644 ---- a/src/pcre2grep.c -+++ b/src/pcre2grep.c -@@ -796,7 +796,7 @@ decode_ANSI_colour(const char *cs) - WORD result = csbi.wAttributes; - while (*cs) - { -- if (isdigit(*cs)) -+ if (isdigit((unsigned char)(*cs))) - { - int code = atoi(cs); - if (code == 1) result |= 0x08; -@@ -810,7 +810,7 @@ while (*cs) - else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08; - else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80; - -- while (isdigit(*cs)) cs++; -+ while (isdigit((unsigned char)(*cs))) cs++; - } - if (*cs) cs++; - } -@@ -1989,7 +1989,7 @@ switch (*(++string)) - case '{': - brace = TRUE; - string++; -- if (!isdigit(*string)) /* Syntax error: a decimal number required. */ -+ if (!isdigit((unsigned char)(*string))) /* Syntax error: a decimal number required. */ - { - if (!callout) - fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n", -@@ -4036,7 +4036,7 @@ for (i = 1; i < argc; i++) - - if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS) - { -- if (isdigit((unsigned char)s[1])) break; -+ if (isdigit((unsigned char)(s[1]))) break; - } - else /* Check for an option with data */ - { -@@ -4520,7 +4520,7 @@ for (fn = file_lists; fn != NULL; fn = fn->next) - { - int frc; - char *end = buffer + (int)strlen(buffer); -- while (end > buffer && isspace(end[-1])) end--; -+ while (end > buffer && isspace((unsigned char)(end[-1]))) end--; - *end = 0; - if (*buffer != 0) - { --- -2.33.0 - diff --git a/backport-avoid-inconsistency-between-d-and-digit-when-using-a.patch b/backport-avoid-inconsistency-between-d-and-digit-when-using-a.patch deleted file mode 100644 index c0c196c948c38cbc353b04b440c0d3291ca386cc..0000000000000000000000000000000000000000 --- a/backport-avoid-inconsistency-between-d-and-digit-when-using-a.patch +++ /dev/null @@ -1,270 +0,0 @@ -From 64549346f044dec18d18d06c2d08a68a68e26817 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= -Date: Sun, 9 Apr 2023 04:29:46 -0700 -Subject: [PATCH] avoid inconsistency between \d and [:digit:] when using /a - (#223) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Conflict:don't modify Changelog and doc/*; keep pcre2.h.generic consistent -with pcre2.h.in according to 1de7291 -Reference:https://github.com/PCRE2Project/pcre2/commit/64549346f044dec18d18d06c2d08a68a68e26817 - -Since a608946 (Additional PCRE2_EXTRA_ASCII_xxx code, 2023-02-01) -PCRE2_EXTRA_ASCII_BSD could be used to restrict \d to ASCII causing -the following inconsistent behaviour in UCP mode. - - PCRE2 version 10.43-DEV 2023-01-15 - re> /\d/utf,ucp,ascii_bsd - data> Ù£ - No match - data> - re> /[[:digit:]]/utf,ucp,ascii_bsd - data> Ù£ - 0: \x{663} - -It has been suggested[1] that the change to match \p{Nd} when Unicode -is enabled for [:digit:] might had been unintentional and a bug, as -[:digit:] should be able to be POSIX compatible, so add a new flag -PCRE2_EXTRA_ASCII_DIGIT to avoid changing its definition in UCP mode. - -[1] https://lore.kernel.org/git/CANgJU+U+xXsh9psd0z5Xjr+Se5QgdKkjQ7LUQ-PdUULSN3n4+g@mail.gmail.com/ ---- - src/pcre2.h.generic | 6 ++++++ - src/pcre2.h.in | 1 + - src/pcre2_compile.c | 6 ++++-- - src/pcre2test.c | 4 +++- - testdata/testinput5 | 10 +++++++++- - testdata/testinput7 | 10 ++++++++-- - testdata/testoutput5 | 19 ++++++++++++++++++- - testdata/testoutput7 | 13 +++++++++++-- - 8 files changed, 60 insertions(+), 9 deletions(-) - -diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic -index dad774ce..05cf9bc1 100644 ---- a/src/pcre2.h.generic -+++ b/src/pcre2.h.generic -@@ -153,6 +153,12 @@ D is inspected during pcre2_dfa_match() execution - #define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */ - #define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */ - #define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */ -+#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */ -+#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */ -+#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */ -+#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */ -+#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */ -+#define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */ - - /* These are for pcre2_jit_compile(). */ - -diff --git a/src/pcre2.h.in b/src/pcre2.h.in -index 7202c633..cd7fdcf2 100644 ---- a/src/pcre2.h.in -+++ b/src/pcre2.h.in -@@ -158,6 +158,7 @@ D is inspected during pcre2_dfa_match() execution - #define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */ - #define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */ - #define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */ -+#define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */ - - /* These are for pcre2_jit_compile(). */ - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 95c4a79d..634360b7 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -786,7 +786,8 @@ are allowed. */ - PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \ - PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \ - PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK|PCRE2_EXTRA_ASCII_BSD| \ -- PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX) -+ PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX| \ -+ PCRE2_EXTRA_ASCII_DIGIT) - - /* Compile time error code numbers. They are given names so that they can more - easily be tracked. When a new number is added, the tables called eint1 and -@@ -3581,7 +3582,8 @@ while (ptr < ptrend) - - #ifdef SUPPORT_UNICODE - if ((options & PCRE2_UCP) != 0 && -- (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0) -+ (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0 && -+ !(posix_class == 7 && (xoptions & PCRE2_EXTRA_ASCII_DIGIT) != 0)) - { - int ptype = posix_substitutes[2*posix_class]; - int pvalue = posix_substitutes[2*posix_class + 1]; -diff --git a/src/pcre2test.c b/src/pcre2test.c -index 4da3ef90..21b19370 100644 ---- a/src/pcre2test.c -+++ b/src/pcre2test.c -@@ -651,6 +651,7 @@ static modstruct modlist[] = { - { "ascii_bsd", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSD, CO(extra_options) }, - { "ascii_bss", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSS, CO(extra_options) }, - { "ascii_bsw", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSW, CO(extra_options) }, -+ { "ascii_digit", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_DIGIT, CO(extra_options) }, - { "ascii_posix", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_POSIX, CO(extra_options) }, - { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, - { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) }, -@@ -4294,13 +4295,14 @@ show_compile_extra_options(uint32_t options, const char *before, - const char *after) - { - if (options == 0) fprintf(outfile, "%s %s", before, after); --else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s", -+else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - before, - ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "", - ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " alt_bsux" : "", - ((options & PCRE2_EXTRA_ASCII_BSD) != 0)? " ascii_bsd" : "", - ((options & PCRE2_EXTRA_ASCII_BSS) != 0)? " ascii_bss" : "", - ((options & PCRE2_EXTRA_ASCII_BSW) != 0)? " ascii_bsw" : "", -+ ((options & PCRE2_EXTRA_ASCII_DIGIT) != 0)? " ascii_digit" : "", - ((options & PCRE2_EXTRA_ASCII_POSIX) != 0)? " ascii_posix" : "", - ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "", - ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "", -diff --git a/testdata/testinput5 b/testdata/testinput5 -index 0f105408..0624a0c3 100644 ---- a/testdata/testinput5 -+++ b/testdata/testinput5 -@@ -1215,6 +1215,8 @@ - - /[[:digit:]]/B,ucp - -+/[[:digit:]]/B,ucp,ascii_digit -+ - /[[:graph:]]/B,ucp - - /[[:print:]]/B,ucp -@@ -1227,7 +1229,7 @@ - - /[[:xdigit:]]/B,ucp - --# Unicode properties for \b abd \B -+# Unicode properties for \b and \B - - /\b...\B/utf,ucp - abc_ -@@ -2431,6 +2433,12 @@ - /[[:digit:]]+/utf,ucp - 123\x{660}456 - -+/[[:digit:]]+/utf,ucp,ascii_digit -+ 123\x{660}456 -+ -+/[[:digit:]]+/g,utf,ucp,ascii_digit -+ 123\x{660}456 -+ - /[[:digit:]]+/utf,ucp,ascii_posix - 123\x{660}456 - -diff --git a/testdata/testinput7 b/testdata/testinput7 -index a2b7fb8d..96deaa30 100644 ---- a/testdata/testinput7 -+++ b/testdata/testinput7 -@@ -1657,7 +1657,7 @@ - /^[\p{Xwd}]+/utf - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - --# Unicode properties for \b abd \B -+# Unicode properties for \b and \B - - /\b...\B/utf,ucp - abc_ -@@ -2435,9 +2435,15 @@ - /[[:digit:]]+/utf,ucp - 123\x{660}456 - -+/[[:digit:]]+/utf,ucp,ascii_digit -+ 123\x{660}456 -+ -+/[[:digit:]]+/g,utf,ucp,ascii_digit -+ 123\x{660}456 -+ - /[[:digit:]]+/utf,ucp,ascii_posix - 123\x{660}456 -- -+ - />[[:space:]]+\x{a0} \x{a0}< - >\x{a0}\x{a0}\x{a0}< -diff --git a/testdata/testoutput5 b/testdata/testoutput5 -index 3cee990e..febcc954 100644 ---- a/testdata/testoutput5 -+++ b/testdata/testoutput5 -@@ -2520,6 +2520,14 @@ No match - End - ------------------------------------------------------------------ - -+/[[:digit:]]/B,ucp,ascii_digit -+------------------------------------------------------------------ -+ Bra -+ [0-9] -+ Ket -+ End -+------------------------------------------------------------------ -+ - /[[:graph:]]/B,ucp - ------------------------------------------------------------------ - Bra -@@ -2568,7 +2576,7 @@ No match - End - ------------------------------------------------------------------ - --# Unicode properties for \b abd \B -+# Unicode properties for \b and \B - - /\b...\B/utf,ucp - abc_ -@@ -5359,6 +5367,15 @@ No match - 123\x{660}456 - 0: 123\x{660}456 - -+/[[:digit:]]+/utf,ucp,ascii_digit -+ 123\x{660}456 -+ 0: 123 -+ -+/[[:digit:]]+/g,utf,ucp,ascii_digit -+ 123\x{660}456 -+ 0: 123 -+ 0: 456 -+ - /[[:digit:]]+/utf,ucp,ascii_posix - 123\x{660}456 - 0: 123 -diff --git a/testdata/testoutput7 b/testdata/testoutput7 -index 4065981d..d98178e6 100644 ---- a/testdata/testoutput7 -+++ b/testdata/testoutput7 -@@ -2853,7 +2853,7 @@ No match - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - --# Unicode properties for \b abd \B -+# Unicode properties for \b and \B - - /\b...\B/utf,ucp - abc_ -@@ -4080,10 +4080,19 @@ No match - 123\x{660}456 - 0: 123\x{660}456 - -+/[[:digit:]]+/utf,ucp,ascii_digit -+ 123\x{660}456 -+ 0: 123 -+ -+/[[:digit:]]+/g,utf,ucp,ascii_digit -+ 123\x{660}456 -+ 0: 123 -+ 0: 456 -+ - /[[:digit:]]+/utf,ucp,ascii_posix - 123\x{660}456 - 0: 123 -- -+ - />[[:space:]]+\x{a0} \x{a0}< - 0: >\x{a0} \x{a0}< --- -2.33.0 - diff --git a/backport-fix-a-possible-integer-overflow-in-DFA-matching-305.patch b/backport-fix-a-possible-integer-overflow-in-DFA-matching-305.patch deleted file mode 100644 index 0441a0f022b03ff6d025a7b96204c90977a280e3..0000000000000000000000000000000000000000 --- a/backport-fix-a-possible-integer-overflow-in-DFA-matching-305.patch +++ /dev/null @@ -1,24 +0,0 @@ -From d231944236c6516de2831cbdde3069dab180ae81 Mon Sep 17 00:00:00 2001 -From: pkuzco -Date: Mon, 9 Oct 2023 17:46:42 +0200 -Subject: [PATCH] fix a possible integer overflow in DFA matching (#305) - ---- - src/pcre2_dfa_match.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c -index 518ac07..13b1ae4 100644 ---- a/src/pcre2_dfa_match.c -+++ b/src/pcre2_dfa_match.c -@@ -428,7 +428,7 @@ overflow. */ - - else - { -- uint32_t newsize = (rws->size >= UINT32_MAX/2)? UINT32_MAX/2 : rws->size * 2; -+ uint32_t newsize = (rws->size >= UINT32_MAX/(sizeof(int)*2))? UINT32_MAX/sizeof(int) : rws->size * 2; - uint32_t newsizeK = newsize/(1024/sizeof(int)); - - if (newsizeK + mb->heap_used > mb->heap_limit) --- -2.33.0 \ No newline at end of file diff --git a/backport-fix-wrong-test.patch b/backport-fix-wrong-test.patch deleted file mode 100644 index 14da368894e1db295fdd8aa40dd91929298c66ee..0000000000000000000000000000000000000000 --- a/backport-fix-wrong-test.patch +++ /dev/null @@ -1,24 +0,0 @@ -From a8558f252e8d8b3fd52d1dc8ea46ecefd991a187 Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg -Date: Tue, 11 Apr 2023 12:55:01 +0000 -Subject: [PATCH] Fix wrong test - -Conflict:NA -Reference:https://github.com/PCRE2Project/pcre2/commit/a8558f252e8d8b3fd52d1dc8ea46ecefd991a187 ---- - src/pcre2_jit_test.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index 7a83bc02..331ed6d4 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -1981,7 +1981,7 @@ static const struct invalid_utf8_regression_test_case invalid_utf8_regression_te - { PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" }, - - { PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" }, -- { PCRE2_UTF, CI, 0, 0, 0, 1, 4, { "[\\D]", NULL }, "@\xe0\xab\xaa@" }, -+ { PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "[\\D]", NULL }, "\xe0\xab\xaa@" }, - - /* These two are not invalid UTF tests, but this infrastructure fits better for them. */ - { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" }, diff --git a/backport-minor-tweaks-to-valid_utf-to-avoid-truncation-of-err.patch b/backport-minor-tweaks-to-valid_utf-to-avoid-truncation-of-err.patch deleted file mode 100644 index f6f3e8e70e70030e8ab729c7f2279513c8ee0efd..0000000000000000000000000000000000000000 --- a/backport-minor-tweaks-to-valid_utf-to-avoid-truncation-of-err.patch +++ /dev/null @@ -1,198 +0,0 @@ -From fdd3ce7e2ad51f38d7c7a47c92f2aa46b290a0f7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= -Date: Tue, 17 Jan 2023 06:43:20 -0800 -Subject: [PATCH] minor tweaks to valid_utf() to avoid truncation of error - offset (#181) - -e8cdae3 (Correct an incorrect cast., 2017-04-14) started changing -some of the casts to fit the type of the error offset variable, so -complete that, and for consistency, add the same type of casts to -the non UTF-8 code. ---- - src/pcre2_valid_utf.c | 48 +++++++++++++++++++++---------------------- - 1 file changed, 24 insertions(+), 24 deletions(-) - -diff --git a/src/pcre2_valid_utf.c b/src/pcre2_valid_utf.c -index e47ea78f..de411b91 100644 ---- a/src/pcre2_valid_utf.c -+++ b/src/pcre2_valid_utf.c -@@ -171,7 +171,7 @@ for (p = string; length > 0; p++) - - if (((d = *(++p)) & 0xc0) != 0x80) - { -- *erroroffset = (int)(p - string) - 1; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 1; - return PCRE2_ERROR_UTF8_ERR6; - } - -@@ -186,7 +186,7 @@ for (p = string; length > 0; p++) - - case 1: if ((c & 0x3e) == 0) - { -- *erroroffset = (int)(p - string) - 1; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 1; - return PCRE2_ERROR_UTF8_ERR15; - } - break; -@@ -198,17 +198,17 @@ for (p = string; length > 0; p++) - case 2: - if ((*(++p) & 0xc0) != 0x80) /* Third byte */ - { -- *erroroffset = (int)(p - string) - 2; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 2; - return PCRE2_ERROR_UTF8_ERR7; - } - if (c == 0xe0 && (d & 0x20) == 0) - { -- *erroroffset = (int)(p - string) - 2; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 2; - return PCRE2_ERROR_UTF8_ERR16; - } - if (c == 0xed && d >= 0xa0) - { -- *erroroffset = (int)(p - string) - 2; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 2; - return PCRE2_ERROR_UTF8_ERR14; - } - break; -@@ -220,22 +220,22 @@ for (p = string; length > 0; p++) - case 3: - if ((*(++p) & 0xc0) != 0x80) /* Third byte */ - { -- *erroroffset = (int)(p - string) - 2; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 2; - return PCRE2_ERROR_UTF8_ERR7; - } - if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ - { -- *erroroffset = (int)(p - string) - 3; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 3; - return PCRE2_ERROR_UTF8_ERR8; - } - if (c == 0xf0 && (d & 0x30) == 0) - { -- *erroroffset = (int)(p - string) - 3; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 3; - return PCRE2_ERROR_UTF8_ERR17; - } - if (c > 0xf4 || (c == 0xf4 && d > 0x8f)) - { -- *erroroffset = (int)(p - string) - 3; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 3; - return PCRE2_ERROR_UTF8_ERR13; - } - break; -@@ -251,22 +251,22 @@ for (p = string; length > 0; p++) - case 4: - if ((*(++p) & 0xc0) != 0x80) /* Third byte */ - { -- *erroroffset = (int)(p - string) - 2; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 2; - return PCRE2_ERROR_UTF8_ERR7; - } - if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ - { -- *erroroffset = (int)(p - string) - 3; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 3; - return PCRE2_ERROR_UTF8_ERR8; - } - if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */ - { -- *erroroffset = (int)(p - string) - 4; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 4; - return PCRE2_ERROR_UTF8_ERR9; - } - if (c == 0xf8 && (d & 0x38) == 0) - { -- *erroroffset = (int)(p - string) - 4; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 4; - return PCRE2_ERROR_UTF8_ERR18; - } - break; -@@ -277,27 +277,27 @@ for (p = string; length > 0; p++) - case 5: - if ((*(++p) & 0xc0) != 0x80) /* Third byte */ - { -- *erroroffset = (int)(p - string) - 2; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 2; - return PCRE2_ERROR_UTF8_ERR7; - } - if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ - { -- *erroroffset = (int)(p - string) - 3; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 3; - return PCRE2_ERROR_UTF8_ERR8; - } - if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */ - { -- *erroroffset = (int)(p - string) - 4; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 4; - return PCRE2_ERROR_UTF8_ERR9; - } - if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */ - { -- *erroroffset = (int)(p - string) - 5; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 5; - return PCRE2_ERROR_UTF8_ERR10; - } - if (c == 0xfc && (d & 0x3c) == 0) - { -- *erroroffset = (int)(p - string) - 5; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 5; - return PCRE2_ERROR_UTF8_ERR19; - } - break; -@@ -309,7 +309,7 @@ for (p = string; length > 0; p++) - - if (ab > 3) - { -- *erroroffset = (int)(p - string) - ab; -+ *erroroffset = (PCRE2_SIZE)(p - string) - ab; - return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12; - } - } -@@ -340,21 +340,21 @@ for (p = string; length > 0; p++) - /* High surrogate. Must be a followed by a low surrogate. */ - if (length == 0) - { -- *erroroffset = p - string; -+ *erroroffset = (PCRE2_SIZE)(p - string); - return PCRE2_ERROR_UTF16_ERR1; - } - p++; - length--; - if ((*p & 0xfc00) != 0xdc00) - { -- *erroroffset = p - string - 1; -+ *erroroffset = (PCRE2_SIZE)(p - string) - 1; - return PCRE2_ERROR_UTF16_ERR2; - } - } - else - { - /* Isolated low surrogate. Always an error. */ -- *erroroffset = p - string; -+ *erroroffset = (PCRE2_SIZE)(p - string); - return PCRE2_ERROR_UTF16_ERR3; - } - } -@@ -379,14 +379,14 @@ for (p = string; length > 0; length--, p++) - /* Normal UTF-32 code point. Neither high nor low surrogate. */ - if (c > 0x10ffffu) - { -- *erroroffset = p - string; -+ *erroroffset = (PCRE2_SIZE)(p - string); - return PCRE2_ERROR_UTF32_ERR2; - } - } - else - { - /* A surrogate */ -- *erroroffset = p - string; -+ *erroroffset = (PCRE2_SIZE)(p - string); - return PCRE2_ERROR_UTF32_ERR1; - } - } --- -2.43.0 - diff --git a/backport-no-partial-match-if-trailing-data-is-invalid-utf-238.patch b/backport-no-partial-match-if-trailing-data-is-invalid-utf-238.patch deleted file mode 100644 index cb50b466085479bcfb1453a07a0784aef06a3af3..0000000000000000000000000000000000000000 --- a/backport-no-partial-match-if-trailing-data-is-invalid-utf-238.patch +++ /dev/null @@ -1,205 +0,0 @@ -From 1bb1ef67eff000fe4aa8c1c45289938188e11e48 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= -Date: Fri, 21 Apr 2023 01:12:45 -0700 -Subject: [PATCH] no partial match if trailing data is invalid utf (#238) - -Avoid returning a partial match if one was found but followed by invalid -UTF, making the result consistent with JIT and unlike: - - PCRE2 version 10.34 2019-11-21 - re> /.a/match_invalid_utf,allvector,jit - data> b\xb1\=ph,ovector=1 - No match - 0: - data> b\xb1\=ph,ovector=1,no_jit - Partial match: b\x{b1} - ** ovector[1] is not equal to the subject length: 1 != 2 - 0: 0 1 - -Conflict:NA -Reference:https://github.com/PCRE2Project/pcre2/commit/1bb1ef67eff000fe4aa8c1c45289938188e11e48 ---- - src/pcre2_match.c | 1 + - testdata/testinput10 | 19 +++++++++++++++++++ - testdata/testinput12 | 14 ++++++++++++++ - testdata/testoutput10 | 32 ++++++++++++++++++++++++++++++++ - testdata/testoutput12-16 | 22 ++++++++++++++++++++++ - testdata/testoutput12-32 | 22 ++++++++++++++++++++++ - 6 files changed, 110 insertions(+) - -diff --git a/src/pcre2_match.c b/src/pcre2_match.c -index ea98af3c..afe3036b 100644 ---- a/src/pcre2_match.c -+++ b/src/pcre2_match.c -@@ -7454,6 +7454,7 @@ if (utf && end_subject != true_end_subject && - if (start_match >= true_end_subject) - { - rc = MATCH_NOMATCH; /* In case it was partial */ -+ match_partial = NULL; - break; - } - -diff --git a/testdata/testinput10 b/testdata/testinput10 -index 53e37cbc..27321e37 100644 ---- a/testdata/testinput10 -+++ b/testdata/testinput10 -@@ -506,6 +506,25 @@ - \= Expect no match - ab\x80cdef\=ph - -+/.a/match_invalid_utf -+ ab\=ph -+ ab\=ps -+ b\xf0\x91\x88b\=ph -+ b\xf0\x91\x88b\=ps -+ b\xf0\x91\x88\xb4a -+\= Expect no match -+ b\x80\=ph -+ b\x80\=ps -+ b\xf0\x91\x88\=ph -+ b\xf0\x91\x88\=ps -+ -+/.a$/match_invalid_utf -+ ab\=ph -+ ab\=ps -+\= Expect no match -+ b\xf0\x91\x98\=ph -+ b\xf0\x91\x98\=ps -+ - /ab$/match_invalid_utf - ab\x80cdeab - \= Expect no match -diff --git a/testdata/testinput12 b/testdata/testinput12 -index 9b4f8d34..7a85eb57 100644 ---- a/testdata/testinput12 -+++ b/testdata/testinput12 -@@ -413,6 +413,20 @@ - \= Expect no match - ab\x{df00}cdef\=ph - -+/.a/match_invalid_utf -+ ab\=ph -+ ab\=ps -+\= Expect no match -+ b\x{df00}\=ph -+ b\x{df00}\=ps -+ -+/.a$/match_invalid_utf -+ ab\=ph -+ ab\=ps -+\= Expect no match -+ b\x{df00}\=ph -+ b\x{df00}\=ps -+ - /ab$/match_invalid_utf - ab\x{df00}cdeab - \= Expect no match -diff --git a/testdata/testoutput10 b/testdata/testoutput10 -index d4085106..1f4c876b 100644 ---- a/testdata/testoutput10 -+++ b/testdata/testoutput10 -@@ -1646,6 +1646,38 @@ Partial match: ab - ab\x80cdef\=ph - No match - -+/.a/match_invalid_utf -+ ab\=ph -+Partial match: b -+ ab\=ps -+Partial match: b -+ b\xf0\x91\x88b\=ph -+Partial match: b -+ b\xf0\x91\x88b\=ps -+Partial match: b -+ b\xf0\x91\x88\xb4a -+ 0: \x{11234}a -+\= Expect no match -+ b\x80\=ph -+No match -+ b\x80\=ps -+No match -+ b\xf0\x91\x88\=ph -+No match -+ b\xf0\x91\x88\=ps -+No match -+ -+/.a$/match_invalid_utf -+ ab\=ph -+Partial match: b -+ ab\=ps -+Partial match: b -+\= Expect no match -+ b\xf0\x91\x98\=ph -+No match -+ b\xf0\x91\x98\=ps -+No match -+ - /ab$/match_invalid_utf - ab\x80cdeab - 0: ab -diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 -index 84c48581..98676324 100644 ---- a/testdata/testoutput12-16 -+++ b/testdata/testoutput12-16 -@@ -1522,6 +1522,28 @@ Partial match: ab - ab\x{df00}cdef\=ph - No match - -+/.a/match_invalid_utf -+ ab\=ph -+Partial match: b -+ ab\=ps -+Partial match: b -+\= Expect no match -+ b\x{df00}\=ph -+No match -+ b\x{df00}\=ps -+No match -+ -+/.a$/match_invalid_utf -+ ab\=ph -+Partial match: b -+ ab\=ps -+Partial match: b -+\= Expect no match -+ b\x{df00}\=ph -+No match -+ b\x{df00}\=ps -+No match -+ - /ab$/match_invalid_utf - ab\x{df00}cdeab - 0: ab -diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 -index 03b6e394..3a20dd4b 100644 ---- a/testdata/testoutput12-32 -+++ b/testdata/testoutput12-32 -@@ -1520,6 +1520,28 @@ Partial match: ab - ab\x{df00}cdef\=ph - No match - -+/.a/match_invalid_utf -+ ab\=ph -+Partial match: b -+ ab\=ps -+Partial match: b -+\= Expect no match -+ b\x{df00}\=ph -+No match -+ b\x{df00}\=ps -+No match -+ -+/.a$/match_invalid_utf -+ ab\=ph -+Partial match: b -+ ab\=ps -+Partial match: b -+\= Expect no match -+ b\x{df00}\=ph -+No match -+ b\x{df00}\=ps -+No match -+ - /ab$/match_invalid_utf - ab\x{df00}cdeab - 0: ab diff --git a/backport-pcre2_compile-avoid-1-byte-buffer-overread-parsing-V.patch b/backport-pcre2_compile-avoid-1-byte-buffer-overread-parsing-V.patch deleted file mode 100644 index 9d56bd5021bdc2083fe28b7d80225abac0299c3d..0000000000000000000000000000000000000000 --- a/backport-pcre2_compile-avoid-1-byte-buffer-overread-parsing-V.patch +++ /dev/null @@ -1,118 +0,0 @@ -From bc367f1880ae5ccc771d5780e35df4c42744a9c4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= -Date: Sun, 22 Sep 2024 01:49:03 -0700 -Subject: [PATCH] pcre2_compile: avoid 1 byte buffer overread parsing VERBs - (#487) - -As reported recently by ef218fb (Guard against out-of-bounds memory -access when parsing LIMIT_HEAP et al (#463), 2024-09-07), a malformed -pattern could result in reading 1 byte past its end. - -Fix a similar issue that affects all VERBs and add test cases to -ensure the original bug and all its siblings are no longer an issue. - -While at it fix the wording of the related documentation. ---- - doc/pcre2syntax.3 | 4 ++-- - src/pcre2_compile.c | 11 +++-------- - testdata/testinput2 | 8 ++++++++ - testdata/testoutput2 | 12 ++++++++++++ - 4 files changed, 25 insertions(+), 10 deletions(-) - -diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3 -index 232125b82..db0bb6586 100644 ---- a/doc/pcre2syntax.3 -+++ b/doc/pcre2syntax.3 -@@ -408,8 +408,8 @@ only one hyphen. Setting (but no unsetting) is allowed after (?^ for example - example (?i:...). - .P - The following are recognized only at the very start of a pattern or after one --of the newline or \eR options with similar syntax. More than one of them may --appear. For the first three, d is a decimal number. -+of the newline or \eR sequences or options with similar syntax. More than one -+of them may appear. For the first three, d is a decimal number. - .sp - (*LIMIT_DEPTH=d) set the backtracking limit to d - (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 7e48b26..3d9a500 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -9877,13 +9877,14 @@ if ((options & PCRE2_LITERAL) == 0) - { - for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++) - { -- uint32_t c, pp; - pso *p = pso_list + i; - - if (patlen - skipatstart - 2 >= p->length && - PRIV(strncmp_c8)(ptr + skipatstart + 2, (char *)(p->name), - p->length) == 0) - { -+ uint32_t c, pp; -+ - skipatstart += p->length + 2; - switch(p->type) - { -@@ -9910,18 +9911,12 @@ if ((options & PCRE2_LITERAL) == 0) - case PSO_LIMH: - c = 0; - pp = skipatstart; -- if (!IS_DIGIT(ptr[pp])) -- { -- errorcode = ERR60; -- ptr += pp; -- goto HAD_EARLY_ERROR; -- } - while (pp < patlen && IS_DIGIT(ptr[pp])) - { - if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */ - c = c*10 + (ptr[pp++] - CHAR_0); - } -- if (pp >= patlen || ptr[pp] != CHAR_RIGHT_PARENTHESIS) -+ if (pp >= patlen || pp == skipatstart || ptr[pp] != CHAR_RIGHT_PARENTHESIS) - { - errorcode = ERR60; - ptr += pp; -diff --git a/testdata/testinput2 b/testdata/testinput2 -index a869c5bc2..542d14520 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -5261,6 +5261,14 @@ a)"xI - - /(*LIMIT_HEAP=0)xxx/I - -+/(*LIMIT_HEAP=123/use_length -+ -+/(*LIMIT_MATCH=/use_length -+ -+/(*CRLF)(*LIMIT_DEPTH=/use_length -+ -+/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length -+ - /\d{0,3}(*:abc)(?C1)xxx/callout_info - - # ---------------------------------------------------------------------- -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index bf7b7620e..b99d64781 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -16220,6 +16220,18 @@ First code unit = 'x' - Last code unit = 'x' - Subject length lower bound = 3 - -+/(*LIMIT_HEAP=123/use_length -+Failed: error 160 at offset 16: (*VERB) not recognized or malformed -+ -+/(*LIMIT_MATCH=/use_length -+Failed: error 160 at offset 14: (*VERB) not recognized or malformed -+ -+/(*CRLF)(*LIMIT_DEPTH=/use_length -+Failed: error 160 at offset 21: (*VERB) not recognized or malformed -+ -+/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length -+Failed: error 160 at offset 34: (*VERB) not recognized or malformed -+ - /\d{0,3}(*:abc)(?C1)xxx/callout_info - Callout 1 x - diff --git a/backport-pcre2grep-document-better-possible-multiline-matchin.patch b/backport-pcre2grep-document-better-possible-multiline-matchin.patch deleted file mode 100644 index cb57e1631e1d8946d6aeac887da6e0d08f15e4a5..0000000000000000000000000000000000000000 --- a/backport-pcre2grep-document-better-possible-multiline-matchin.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 1bc34ffa64c33381d793fb5cdddf3f484e603d23 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= -Date: Fri, 12 May 2023 07:54:02 -0700 -Subject: [PATCH] pcre2grep: document better possible multiline matching misses - (#252) - -While at it, remove a misplaced cast that would cause problems for -subjects over 2GB and a few typos. - -Conflict:don't modify ucptest.c because ucptest.c doesn't exist in current version. -Reference:https://github.com/PCRE2Project/pcre2/commit/1bc34ffa64c33381d793fb5cdddf3f484e603d23 ---- - doc/pcre2grep.1 | 11 ++++++++--- - src/pcre2_compile.c | 4 ++-- - src/pcre2grep.c | 2 +- - 3 files changed, 11 insertions(+), 6 deletions(-) - -diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 -index 1081591..5077304 100644 ---- a/doc/pcre2grep.1 -+++ b/doc/pcre2grep.1 -@@ -66,6 +66,9 @@ The block of memory that is actually used is three times the "buffer size", to - allow for buffering "before" and "after" lines. If the buffer size is too - small, fewer than requested "before" and "after" lines may be output. - .P -+When matching with a multiline pattern, the size of the buffer must be at least -+half of the maximum match expected or the pattern might fail to match. -+.P - Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater. - BUFSIZ is defined in \fB\fP. When there is more than one pattern - (specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to -@@ -201,7 +204,7 @@ exactly the same as the number of lines that would have been output, but if the - \fB-M\fP (multiline) option is used (without \fB-v\fP), there may be more - suppressed lines than the count (that is, the number of matches). - .sp --If no lines are selected, the number zero is output. If several files are are -+If no lines are selected, the number zero is output. If several files are - being scanned, a count is output for each of them and the \fB-t\fP option can - be used to cause a total to be output at the end. However, if the - \fB--files-with-matches\fP option is also used, only those files whose counts -@@ -490,8 +493,10 @@ well as possibly handling a two-character newline sequence. - .sp - There is a limit to the number of lines that can be matched, imposed by the way - that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently --large processing buffer, this should not be a problem, but the \fB-M\fP option --does not work when input is read line by line (see \fB--line-buffered\fP.) -+large processing buffer, this should not be a problem. -+.sp -+The \fB-M\fP option does not work when input is read line by line (see -+\fB--line-buffered\fP.) - .TP - \fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP - Stop processing after finding \fInumber\fP matching lines, or non-matching -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index b906dc0..c6d4c60 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -5549,8 +5549,8 @@ for (;; pptr++) - - If the class contains characters outside the 0-255 range, a different - opcode is compiled. It may optionally have a bit map for characters < 256, -- but those above are are explicitly listed afterwards. A flag code unit -- tells whether the bitmap is present, and whether this is a negated class or -+ but those above are explicitly listed afterwards. A flag code unit tells -+ whether the bitmap is present, and whether this is a negated class or - not. */ - - case META_CLASS_NOT: -diff --git a/src/pcre2grep.c b/src/pcre2grep.c -index 1484d67..d2ab620 100644 ---- a/src/pcre2grep.c -+++ b/src/pcre2grep.c -@@ -1856,7 +1856,7 @@ if (slen > 200) - - for (int i = 1; p != NULL; p = p->next, i++) - { -- int rc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length, -+ int rc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, length, - startoffset, options, match_data, match_context); - if (rc == PCRE2_ERROR_NOMATCH) continue; - --- -2.27.0 - diff --git a/pcre2-10.46.tar.bz2 b/pcre2-10.46.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..86b09b6398853a8da2de86d6795653ac206a49cf Binary files /dev/null and b/pcre2-10.46.tar.bz2 differ diff --git a/pcre2.spec b/pcre2.spec index ab6aa88f57fc2a7812267e6c1f5ed9679b2f9e2a..87726ceed19c2d0722f7e0e831f5a3d14c88978e 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -1,6 +1,6 @@ Name: pcre2 -Version: 10.42 -Release: 14 +Version: 10.46 +Release: 1 Summary: Perl Compatible Regular Expressions License: BSD URL: http://www.pcre.org/ @@ -8,45 +8,6 @@ Source0: https://github.com/PCRE2Project/pcre2/releases/download/%{name}-%{v # Do no set RPATH if libdir is not /usr/lib Patch6000: backport-pcre2-10.10-Fix-multilib.patch -Patch6001: backport-no-partial-match-if-trailing-data-is-invalid-utf-238.patch -Patch6002: backport-Fix-an-invalid-match-of-ascii-word-classes-when-inva.patch -Patch6003: backport-fix-wrong-test.patch -Patch6004: sljit-sv48-sv57.patch -Patch6005: backport-fix-a-possible-integer-overflow-in-DFA-matching-305.patch - -Patch6006: backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch -Patch6007: backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch -Patch6008: backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch -Patch6009: backport-Fix-incorrect-patch-in-c1306126.patch -Patch6010: backport-Fix-another-oversight-in-c1306126.patch -Patch6011: backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch -Patch6012: backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch -Patch6013: backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch -Patch6014: backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch -Patch6015: backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch -Patch6016: backport-Fix-accept-and-endanchored-interaction-in-JIT.patch -Patch6017: backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch -Patch6018: backport-Sanity-checks-for-ctype-functions-342.patch -Patch6019: backport-Fix-incorrect-class-character-matches-in-JIT.patch -Patch6020: backport-Fixing-an-issue-using-empty-character-sets-in-jit.patch -Patch6021: backport-pcre2grep-document-better-possible-multiline-matchin.patch -Patch6022: backport-Remove-incorrect-optimization-in-DFA-matching-when-p.patch -Patch6023: backport-Implement-PCRE2_EXTRA_CASELESS_RESTRICT-and-related-.patch -Patch6024: backport-Additional-PCRE2_EXTRA_ASCII_xxx-code.patch -Patch6025: backport-Fix-non-recognition-of-some-octal-escapes-in-substitute.patch -Patch6026: backport-Guard-against-out-of-bounds-memory-access-when-parsing.patch -Patch6027: backport-Add-Perl-titlecasing-475.patch -Patch6028: backport-Fix-incorrect-positive-error-code-from-pcre2_substitute.patch -Patch6029: backport-pcre2_compile-avoid-1-byte-buffer-overread-parsing-V.patch -Patch6030: backport-Improve-error-message-for-N-name-in-character-classes.patch -Patch6031: backport-Further-ASCII-tests-and-minor-bugfix-plus-ChangeLog-.patch -Patch6032: backport-avoid-inconsistency-between-d-and-digit-when-using-a.patch -Patch6033: backport-Fix-the-lookahead-after-d-or-posix-to-skip-whitespac.patch -Patch6034: backport-Improve-error-offsets-for-character-classes-548.patch -Patch6035: backport-Non-recursive-scan-prefix-in-JIT-560.patch -Patch6036: backport-Mend-a-bug-in-pcre2grep-that-caused-separator-lines-.patch -Patch6037: backport-Fix-oversight-in-adding-new-pcre2grep-test.patch -Patch6038: backport-minor-tweaks-to-valid_utf-to-avoid-truncation-of-err.patch BuildRequires: autoconf libtool automake coreutils gcc make readline-devel Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools @@ -141,7 +102,7 @@ make check %defattr(-,root,root) %license %{_pkgdocdir}/{COPYING,LICENCE} %exclude %{_pkgdocdir}/ChangeLog -%doc AUTHORS NEWS HACKING README +%doc NEWS HACKING README AUTHORS.md LICENCE.md SECURITY.md %{_libdir}/libpcre2-8.so.* %{_libdir}/libpcre2-posix.so.* %{_libdir}/libpcre2-16.so.* @@ -164,6 +125,9 @@ make check %{_pkgdocdir}/html/ %changelog +* Fri Aug 29 2025 Yu Peng - 10.46-1 +- DESC:upgrade to 10.46 for fix CVE-2025-58050 + * Fri Aug 22 2025 yixiangzhike - 10.42-14 - DESC:sync patch from upstream to avoid truncation of error offset diff --git a/sljit-sv48-sv57.patch b/sljit-sv48-sv57.patch deleted file mode 100644 index 01d813e42841d0b5c685491f888c8b3182c8aabb..0000000000000000000000000000000000000000 --- a/sljit-sv48-sv57.patch +++ /dev/null @@ -1,313 +0,0 @@ -diff --git a/src/sljit/sljitNativeRISCV_32.c b/src/sljit/sljitNativeRISCV_32.c -index b38e692..2b744e7 100644 ---- a/src/sljit/sljitNativeRISCV_32.c -+++ b/src/sljit/sljitNativeRISCV_32.c -@@ -24,11 +24,8 @@ - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - --static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r) -+static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm) - { -- SLJIT_UNUSED_ARG(tmp_r); -- SLJIT_ASSERT(dst_r != tmp_r); -- - if (imm <= SIMM_MAX && imm >= SIMM_MIN) - return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); - -diff --git a/src/sljit/sljitNativeRISCV_64.c b/src/sljit/sljitNativeRISCV_64.c -index 32cec78..061cda9 100644 ---- a/src/sljit/sljitNativeRISCV_64.c -+++ b/src/sljit/sljitNativeRISCV_64.c -@@ -24,106 +24,56 @@ - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - --static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r) -+static int trailing_zeros_64(sljit_uw x) - { -- sljit_sw high; -- -- SLJIT_ASSERT(dst_r != tmp_r); -- -- if (imm <= SIMM_MAX && imm >= SIMM_MIN) -- return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); -- -- if (imm <= 0x7fffffffl && imm >= S32_MIN) { -- if (imm > S32_MAX) { -- SLJIT_ASSERT((imm & 0x800) != 0); -- FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); -- return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); -- } -- -- if ((imm & 0x800) != 0) -- imm += 0x1000; -- -- FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); -- -- if ((imm & 0xfff) == 0) -- return SLJIT_SUCCESS; -- -- return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); -+ /* See http://supertech.csail.mit.edu/papers/debruijn.pdf */ -+ static const sljit_u8 debruijn64tab[64] = { -+ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, -+ 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, -+ 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, -+ 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, -+ }; -+ -+ static const sljit_uw debruijn64 = 0x03f79d71b4ca8b09ULL; -+ if (x == 0) { -+ return 64; - } -+ return (int)debruijn64tab[(x & -x) * debruijn64 >> (64 - 6)]; -+} -+static sljit_s32 load_immediate_32(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_s32 imm) -+{ -+ /* Add 0x800 to cancel out the signed extension of ADDIW. */ -+ sljit_s32 hi20 = (imm + 0x800) >> 12 & 0xfffff; -+ sljit_s32 lo12 = imm & 0xfff; -+ sljit_s32 src_r = 0; - -- /* Trailing zeroes could be used to produce shifted immediates. */ -- -- if (imm <= 0x7ffffffffffl && imm >= -0x80000000000l) { -- high = imm >> 12; -- -- if (imm & 0x800) -- high = ~high; -- -- if (high > S32_MAX) { -- SLJIT_ASSERT((high & 0x800) != 0); -- FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); -- FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); -- } else { -- if ((high & 0x800) != 0) -- high += 0x1000; -- -- FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(high & ~0xfff))); -- -- if ((high & 0xfff) != 0) -- FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); -- } -- -- FAIL_IF(push_inst(compiler, SLLI | RD(dst_r) | RS1(dst_r) | IMM_I(12))); -- -- if ((imm & 0xfff) != 0) -- return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); -- -- return SLJIT_SUCCESS; -+ if (hi20 != 0) { -+ FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(hi20 << 12))); - } -- -- high = imm >> 32; -- imm = (sljit_s32)imm; -- -- if ((imm & 0x80000000l) != 0) -- high = ~high; -- -- if (high <= 0x7ffff && high >= -0x80000) { -- FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high << 12))); -- high = 0x1000; -- } else { -- if ((high & 0x800) != 0) -- high += 0x1000; -- -- FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high & ~0xfff))); -- high &= 0xfff; -+ if (lo12 != 0 || hi20 == 0) { -+ src_r = hi20 != 0 ? dst_r : 0; -+ FAIL_IF(push_inst(compiler, ADDIW | RD(dst_r) | RS1(src_r) | IMM_I(lo12))); - } -- -- if (imm <= SIMM_MAX && imm >= SIMM_MIN) { -- FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm))); -- imm = 0; -- } else if (imm > S32_MAX) { -- SLJIT_ASSERT((imm & 0x800) != 0); -- -- FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); -- imm = 0x1000 | (imm & 0xfff); -- } else { -- if ((imm & 0x800) != 0) -- imm += 0x1000; -- -- FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); -- imm &= 0xfff; -+ return SLJIT_SUCCESS; -+} -+static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm) -+{ -+ sljit_sw lo12, hi52; -+ sljit_s32 shift; -+ if (((imm << 32) >> 32) == imm) { -+ return load_immediate_32(compiler, dst_r, (sljit_s32)imm); - } -- -- if ((high & 0xfff) != 0) -- FAIL_IF(push_inst(compiler, ADDI | RD(tmp_r) | RS1(tmp_r) | IMM_I(high))); -- -- if (imm & 0x1000) -- FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); -- else if (imm != 0) -- FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); -- -- FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(tmp_r) | IMM_I((high & 0x1000) ? 20 : 32))); -- return push_inst(compiler, XOR | RD(dst_r) | RS1(dst_r) | RS2(tmp_r)); -+ lo12 = (imm << 52) >> 52; -+ /* Add 0x800 to cancel out the signed extension of ADDI. */ -+ hi52 = (imm + 0x800) >> 12; -+ shift = 12 + trailing_zeros_64((sljit_uw)hi52); -+ hi52 = ((hi52 >> (shift - 12)) << shift) >> shift; -+ load_immediate(compiler, dst_r, hi52); -+ FAIL_IF(push_inst(compiler, SLLI | RD(dst_r) | RS1(dst_r) | IMM_I(shift))); -+ if (lo12) { -+ FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(lo12))); -+ } -+ return SLJIT_SUCCESS; - } - - static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins) -diff --git a/src/sljit/sljitNativeRISCV_common.c b/src/sljit/sljitNativeRISCV_common.c -index 58a48c6..5bf5b3e 100644 ---- a/src/sljit/sljitNativeRISCV_common.c -+++ b/src/sljit/sljitNativeRISCV_common.c -@@ -79,6 +79,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - - #define ADD (F7(0x0) | F3(0x0) | OPC(0x33)) - #define ADDI (F3(0x0) | OPC(0x13)) -+#define ADDIW (F3(0x0) | OPC(0x1b)) - #define AND (F7(0x0) | F3(0x7) | OPC(0x33)) - #define ANDI (F3(0x7) | OPC(0x13)) - #define AUIPC (OPC(0x17)) -@@ -628,7 +629,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi - local_size -= STACK_MAX_DISTANCE; - - if (local_size > STACK_MAX_DISTANCE) -- FAIL_IF(load_immediate(compiler, TMP_REG1, local_size, TMP_REG3)); -+ FAIL_IF(load_immediate(compiler, TMP_REG1, local_size)); - offset = STACK_MAX_DISTANCE - SSIZE_OF(sw); - } - -@@ -725,7 +726,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit - local_size -= STACK_MAX_DISTANCE; - - if (local_size > STACK_MAX_DISTANCE) { -- FAIL_IF(load_immediate(compiler, TMP_REG2, local_size, TMP_REG3)); -+ FAIL_IF(load_immediate(compiler, TMP_REG2, local_size)); - FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RS1(SLJIT_SP) | RS2(TMP_REG2))); - } else - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size))); -@@ -966,11 +967,11 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl - argw_hi = TO_ARGW_HI(argw); - - if (next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN && argw_hi != TO_ARGW_HI(next_argw)) { -- FAIL_IF(load_immediate(compiler, TMP_REG3, argw, tmp_r)); -+ FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - compiler->cache_argw = argw; - offset = 0; - } else { -- FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi, tmp_r)); -+ FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi)); - compiler->cache_argw = argw_hi; - offset = argw & 0xfff; - argw = argw_hi; -@@ -1013,7 +1014,7 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s - - argw = 0; - } else { -- FAIL_IF(load_immediate(compiler, tmp_r, TO_ARGW_HI(argw), TMP_REG3)); -+ FAIL_IF(load_immediate(compiler, tmp_r, TO_ARGW_HI(argw))); - - if (base != 0) - FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(tmp_r) | RS2(base))); -@@ -1534,9 +1535,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 - compiler->cache_argw = 0; - } - -- if (dst == TMP_REG2) { -+ if (dst == 0) { - SLJIT_ASSERT(HAS_FLAGS(op)); - flags |= UNUSED_DEST; -+ dst = TMP_REG2; - } - else if (FAST_IS_REG(dst)) { - dst_r = dst; -@@ -1571,7 +1573,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 - } - else if (src1 & SLJIT_IMM) { - if (src1w) { -- FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); -+ FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1_r = TMP_REG1; - } - else -@@ -1595,7 +1597,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 - else if (src2 & SLJIT_IMM) { - if (!(flags & SRC2_IMM)) { - if (src2w) { -- FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w, TMP_REG3)); -+ FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); - src2_r = sugg_src2_r; - } - else { -@@ -1827,7 +1829,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil - CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); - - SLJIT_SKIP_CHECKS(compiler); -- return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); -+ return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w); - } - - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, -@@ -1875,7 +1877,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * - FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG1, src1, src1w)); - src1 = TMP_REG1; - } else if (src1 & SLJIT_IMM) { -- FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); -+ FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1 = TMP_REG1; - } - -@@ -2032,7 +2034,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp - srcw = (sljit_s32)srcw; - #endif - -- FAIL_IF(load_immediate(compiler, TMP_REG1, srcw, TMP_REG3)); -+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - src = TMP_REG1; - } - -@@ -2422,7 +2424,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler - - if (src1 & SLJIT_IMM) { - if (src1w != 0) { -- PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); -+ PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1 = TMP_REG1; - } - else -@@ -2431,7 +2433,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler - - if (src2 & SLJIT_IMM) { - if (src2w != 0) { -- PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w, TMP_REG3)); -+ PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w)); - src2 = TMP_REG2; - } - else -@@ -2676,10 +2678,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile - memw = 0; - } else if (memw > SIMM_MAX - SSIZE_OF(sw) || memw < SIMM_MIN) { - if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) { -- FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw), TMP_REG3)); -+ FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw))); - memw &= 0xfff; - } else { -- FAIL_IF(load_immediate(compiler, TMP_REG1, memw, TMP_REG3)); -+ FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); - memw = 0; - } -