diff --git a/backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch b/backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch new file mode 100644 index 0000000000000000000000000000000000000000..6cb26966260051fbb6743b47c865af1fec1ac40f --- /dev/null +++ b/backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch @@ -0,0 +1,119 @@ +From c1306126c3f12c16ad62dd2553132f64a28ca607 Mon Sep 17 00:00:00 2001 +From: Philip Hazel +Date: Sun, 19 Nov 2023 17:18:07 +0000 +Subject: [PATCH] Fix 32-bit quantifier following a character larger than the + maximum UTF character. + +Conflict:don't modify ChangeLog; adapt context +Reference:https://github.com/PCRE2Project/pcre2/commit/c1306126c3f12c16ad62dd2553132f64a28ca607 + +--- + src/pcre2_compile.c | 11 ++++++++--- + testdata/testinput12 | 6 ++++++ + testdata/testoutput12-16 | 7 +++++++ + testdata/testoutput12-32 | 7 +++++++ + 4 files changed, 28 insertions(+), 3 deletions(-) + +diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c +index b3e4969..fdaf2ad 100644 +--- a/src/pcre2_compile.c ++++ b/src/pcre2_compile.c +@@ -2781,6 +2781,7 @@ uint32_t *verbstartptr = NULL; + uint32_t *previous_callout = NULL; + uint32_t *parsed_pattern = cb->parsed_pattern; + uint32_t *parsed_pattern_end = cb->parsed_pattern_end; ++uint32_t *this_parsed_item = NULL; + uint32_t meta_quantifier = 0; + uint32_t add_after_mark = 0; + uint32_t extra_options = cb->cx->extra_options; +@@ -2866,10 +2867,11 @@ while (ptr < ptrend) + uint32_t set, unset, *optset; + uint32_t terminator; + uint32_t prev_meta_quantifier; ++ uint32_t *prev_parsed_item = this_parsed_item; + BOOL prev_okquantifier; + PCRE2_SPTR tempptr; + PCRE2_SIZE offset; +- ++ + if (parsed_pattern >= parsed_pattern_end) + { + errorcode = ERR63; /* Internal error (parsed pattern overflow) */ +@@ -2881,6 +2883,10 @@ while (ptr < ptrend) + errorcode = ERR19; + goto FAILED; /* Parentheses too deeply nested */ + } ++ ++ /* Remember where this item started */ ++ ++ this_parsed_item = parsed_pattern; + + /* Get next input character, save its position for callout handling. */ + +@@ -3173,7 +3179,6 @@ while (ptr < ptrend) + continue; /* Next character in pattern */ + } + +- + /* Process the next item in the main part of a pattern. */ + + switch(c) +@@ -3450,7 +3455,7 @@ while (ptr < ptrend) + wrapping it in non-capturing brackets, but we have to allow for a preceding + (*MARK) for when (*ACCEPT) has an argument. */ + +- if (parsed_pattern[-1] == META_ACCEPT) ++ if (*prev_parsed_item == META_ACCEPT) + { + uint32_t *p; + for (p = parsed_pattern - 1; p >= verbstartptr; p--) p[1] = p[0]; +diff --git a/testdata/testinput12 b/testdata/testinput12 +index 7a85eb5..1e552e6 100644 +--- a/testdata/testinput12 ++++ b/testdata/testinput12 +@@ -560,4 +560,10 @@ + + # ---------------------------------------------------- + ++# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This ++# fails in 16-bit mode, but is OK for 32-bit. ++ ++/\x{802a0000}*/ ++ \x{802a0000}\x{802a0000} ++ + # End of testinput12 +diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 +index 9867632..8cbc13d 100644 +--- a/testdata/testoutput12-16 ++++ b/testdata/testoutput12-16 +@@ -1803,4 +1803,11 @@ No match + + # ---------------------------------------------------- + ++# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This ++# fails in 16-bit mode, but is OK for 32-bit. ++ ++/\x{802a0000}*/ ++Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large ++ \x{802a0000}\x{802a0000} ++ + # End of testinput12 +diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 +index 3a20dd4..1a98b4b 100644 +--- a/testdata/testoutput12-32 ++++ b/testdata/testoutput12-32 +@@ -1801,4 +1801,11 @@ No match + + # ---------------------------------------------------- + ++# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This ++# fails in 16-bit mode, but is OK for 32-bit. ++ ++/\x{802a0000}*/ ++ \x{802a0000}\x{802a0000} ++ 0: \x{802a0000}\x{802a0000} ++ + # End of testinput12 +-- +2.23.0 + diff --git a/backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch b/backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch new file mode 100644 index 0000000000000000000000000000000000000000..856353126dd40ac449e2bb5642eca7b1ed93f456 --- /dev/null +++ b/backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch @@ -0,0 +1,94 @@ +From 45dcb3de900b77583f4e9daa663004c55fad4794 Mon Sep 17 00:00:00 2001 +From: Zoltan Herczeg +Date: Wed, 22 Nov 2023 10:22:59 +0000 +Subject: [PATCH] Fix \X matching in 32 bit mode without UTF in JIT + +Conflict:don't modify ChangeLog; adapt context +Reference:https://github.com/PCRE2Project/pcre2/commit/45dcb3de900b77583f4e9daa663004c55fad4794 + +--- + src/pcre2_jit_compile.c | 6 +++--- + testdata/testinput12 | 4 ++++ + testdata/testoutput12-16 | 9 +++++++++ + testdata/testoutput12-32 | 5 +++++ + 4 files changed, 21 insertions(+), 3 deletions(-) + +diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c +index 510c392..8d64e1c 100644 +--- a/src/pcre2_jit_compile.c ++++ b/src/pcre2_jit_compile.c +@@ -8718,7 +8718,7 @@ c = *cc++; + + #if PCRE2_CODE_UNIT_WIDTH == 32 + if (c >= 0x110000) +- return NULL; ++ return cc; + #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ + lgb = UCD_GRAPHBREAK(c); + +@@ -8958,7 +8958,7 @@ switch(type) + #else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, + common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf)); +- if (!common->utf || common->invalid_utf) ++ if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); + #endif + +@@ -12044,7 +12044,7 @@ switch(opcode) + } + + #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +- if (common->utf) ++ if (type == OP_EXTUNI || common->utf) + { + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + detect_partial_match(common, &no_match); +diff --git a/testdata/testinput12 b/testdata/testinput12 +index 5a2d8d2..a6678bb 100644 +--- a/testdata/testinput12 ++++ b/testdata/testinput12 +@@ -569,4 +569,8 @@ + /\x{802a0000}*/ + \x{802a0000}\x{802a0000} + ++# UTF matching without UTF, check invalid UTF characters ++/\X++/ ++ a\x{110000}\x{ffffffff} ++ + # End of testinput12 +diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 +index 9ac403e..f3b40a3 100644 +--- a/testdata/testoutput12-16 ++++ b/testdata/testoutput12-16 +@@ -1814,4 +1814,13 @@ No match + Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large + \x{802a0000}\x{802a0000} + ++# UTF matching without UTF, check invalid UTF characters ++/\X++/ ++ a\x{110000}\x{ffffffff} ++** Character \x{110000} is greater than 0xffff and UTF-16 mode is not enabled. ++** Truncation will probably give the wrong result. ++** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. ++** Truncation will probably give the wrong result. ++ 0: a\x00\x{ffff} ++ + # End of testinput12 +diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 +index 9396305..dd42f86 100644 +--- a/testdata/testoutput12-32 ++++ b/testdata/testoutput12-32 +@@ -1812,4 +1812,9 @@ No match + \x{802a0000}\x{802a0000} + 0: \x{802a0000}\x{802a0000} + ++# UTF matching without UTF, check invalid UTF characters ++/\X++/ ++ a\x{110000}\x{ffffffff} ++ 0: a\x{110000}\x{ffffffff} ++ + # End of testinput12 +-- +2.23.0 + diff --git a/backport-Fix-accept-and-endanchored-interaction-in-JIT.patch b/backport-Fix-accept-and-endanchored-interaction-in-JIT.patch new file mode 100644 index 0000000000000000000000000000000000000000..f8091b3d8bf54602d378fc16cb6fce360f3e6206 --- /dev/null +++ b/backport-Fix-accept-and-endanchored-interaction-in-JIT.patch @@ -0,0 +1,78 @@ +From 1c09efe6b0008a3b463299efe7501bc3140806f3 Mon Sep 17 00:00:00 2001 +From: Zoltan Herczeg +Date: Wed, 6 Dec 2023 10:06:50 +0000 +Subject: [PATCH] Fix accept and endanchored interaction in JIT + +Conflict:don't modify ChangeLog +Reference:https://github.com/PCRE2Project/pcre2/commit/1c09efe6b0008a3b463299efe7501bc3140806f3 + +--- + src/pcre2_jit_compile.c | 15 ++++++++++++--- + src/pcre2_jit_test.c | 1 + + 2 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c +index 2e11c3c..849e2c8 100644 +--- a/src/pcre2_jit_compile.c ++++ b/src/pcre2_jit_compile.c +@@ -489,6 +489,8 @@ typedef struct compiler_common { + jump_list *casefulcmp; + jump_list *caselesscmp; + jump_list *reset_match; ++ /* Same as reset_match, but resets the STR_PTR as well. */ ++ jump_list *restart_match; + BOOL unset_backref; + BOOL alt_circumflex; + #ifdef SUPPORT_UNICODE +@@ -3146,7 +3148,7 @@ return (value & (value - 1)) == 0; + + static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) + { +-while (list) ++while (list != NULL) + { + /* sljit_set_label is clever enough to do nothing + if either the jump or the label is NULL. */ +@@ -12187,7 +12189,7 @@ if (*cc == OP_FAIL) + } + + if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0) +- add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); ++ add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + + if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty) + { +@@ -14552,10 +14554,17 @@ if (common->caselesscmp != NULL) + set_jumps(common->caselesscmp, LABEL()); + do_caselesscmp(common); + } +-if (common->reset_match != NULL) ++if (common->reset_match != NULL || common->restart_match != NULL) + { ++ if (common->restart_match != NULL) ++ { ++ set_jumps(common->restart_match, LABEL()); ++ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); ++ } ++ + set_jumps(common->reset_match, LABEL()); + do_reset_match(common, (re->top_bracket + 1) * 2); ++ /* The value of restart_match is in TMP1. */ + CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); + OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); + JUMPTO(SLJIT_JUMP, reset_match_label); +diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c +index b5d95d5..0974d19 100644 +--- a/src/pcre2_jit_test.c ++++ b/src/pcre2_jit_test.c +@@ -655,6 +655,7 @@ static struct regression_test_case regression_test_cases[] = { + { MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" }, + { MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" }, + { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" }, ++ { MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" }, + + /* Conditional blocks. */ + { MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" }, +-- +2.23.0 + diff --git a/backport-Fix-another-oversight-in-c1306126.patch b/backport-Fix-another-oversight-in-c1306126.patch new file mode 100644 index 0000000000000000000000000000000000000000..2f9c3c4f02333ff6b24dd594db4b38eff9888632 --- /dev/null +++ b/backport-Fix-another-oversight-in-c1306126.patch @@ -0,0 +1,31 @@ +From 04f6668a09c51cf10fa5514019843ab0af9724c8 Mon Sep 17 00:00:00 2001 +From: Philip Hazel +Date: Tue, 21 Nov 2023 15:10:34 +0000 +Subject: [PATCH] Fix another oversight in c1306126 + +Conflict:NA +Reference:https://github.com/PCRE2Project/pcre2/commit/04f6668a09c51cf10fa5514019843ab0af9724c8 + +--- + src/pcre2_compile.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c +index 9e45580..7b522c5 100644 +--- a/src/pcre2_compile.c ++++ b/src/pcre2_compile.c +@@ -3108,8 +3108,11 @@ while (ptr < ptrend) + !read_repeat_counts(&tempptr, ptrend, NULL, NULL, &errorcode)))) + { + if (after_manual_callout-- <= 0) ++ { + parsed_pattern = manage_callouts(thisptr, &previous_callout, auto_callout, + parsed_pattern, cb); ++ this_parsed_item = parsed_pattern; /* New start for current item */ ++ } + } + + /* If expect_cond_assert is 2, we have just passed (?( and are expecting an +-- +2.23.0 + diff --git a/backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch b/backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch new file mode 100644 index 0000000000000000000000000000000000000000..085374952e83d5cb3474c61ffcbdaf9d086d3f4b --- /dev/null +++ b/backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch @@ -0,0 +1,46 @@ +From 936fef2a4480b21f5c43b207181097736fb311e3 Mon Sep 17 00:00:00 2001 +From: Zoltan Herczeg +Date: Wed, 22 Nov 2023 11:50:38 +0000 +Subject: [PATCH] Fix backref iterators when PCRE2_MATCH_UNSET_BACKREF is set + in JIT + +Conflict:don't modify ChangeLog +Reference:https://github.com/PCRE2Project/pcre2/commit/936fef2a4480b21f5c43b207181097736fb311e3 + +--- + src/pcre2_jit_compile.c | 4 +++- + src/pcre2_jit_test.c | 1 + + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c +index 8d64e1c..8110d8c 100644 +--- a/src/pcre2_jit_compile.c ++++ b/src/pcre2_jit_compile.c +@@ -9539,9 +9539,11 @@ if (!minimize) + if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); ++ + if (ref) + { +- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); ++ if (!common->unset_backref) ++ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + } + else +diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c +index b27cec7..8bff3dc 100644 +--- a/src/pcre2_jit_test.c ++++ b/src/pcre2_jit_test.c +@@ -595,6 +595,7 @@ static struct regression_test_case regression_test_cases[] = { + { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{1,3}M", "aaaaaaaabbbbaabbbbm" }, + { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" }, + { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{2,3}?", "aaaabbbbaaaabbbbbbbbbb" }, ++ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" }, + + /* Assertions. */ + { MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" }, +-- +2.23.0 + diff --git a/backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch b/backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch new file mode 100644 index 0000000000000000000000000000000000000000..801b443d96f96aafbe5a9de5b80c1558a8fcff63 --- /dev/null +++ b/backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch @@ -0,0 +1,44 @@ +From 9de4d53cf850e0fca625ce9d80c12bea5b2a5ab9 Mon Sep 17 00:00:00 2001 +From: Zoltan Herczeg +Date: Thu, 7 Dec 2023 09:03:24 +0000 +Subject: [PATCH] Fix backreferences with unset backref and non-greedy + iterators in JIT + +Conflict:don't modify ChangeLog; modify topbacktracks instead of +own_backtracks because c3529d0227e is not merged +Reference:https://github.com/PCRE2Project/pcre2/commit/9de4d53cf850e0fca625ce9d80c12bea5b2a5ab9 + +--- + src/pcre2_jit_compile.c | 3 ++- + src/pcre2_jit_test.c | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c +index 0f445e1..e1daa1e 100644 +--- a/src/pcre2_jit_compile.c ++++ b/src/pcre2_jit_compile.c +@@ -9653,7 +9653,8 @@ else + { + if (ref) + { +- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); ++ if (!common->unset_backref) ++ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + } + else +diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c +index 0974d19..9b63c8e 100644 +--- a/src/pcre2_jit_test.c ++++ b/src/pcre2_jit_test.c +@@ -596,6 +596,7 @@ static struct regression_test_case regression_test_cases[] = { + { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" }, + { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{2,3}?", "aaaabbbbaaaabbbbbbbbbb" }, + { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" }, ++ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" }, + + /* Assertions. */ + { MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" }, +-- +2.23.0 + diff --git a/backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch b/backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch new file mode 100644 index 0000000000000000000000000000000000000000..d7d94b32d6222fac1070f5ac1c051660bfeda6d0 --- /dev/null +++ b/backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch @@ -0,0 +1,90 @@ +From 57ee073252dc826dbe412846a83421d2bb4483bc Mon Sep 17 00:00:00 2001 +From: Philip Hazel +Date: Wed, 22 Nov 2023 11:34:27 +0000 +Subject: [PATCH] Fix bad patch in 05206d66. The interpreter was handling + NOTEOL incorrectly in general after trying to fix it in invalid UTF subjects. + +Conflict:adapt context +Reference:https://github.com/PCRE2Project/pcre2/commit/57ee073252dc826dbe412846a83421d2bb4483bc + +--- + src/pcre2_intmodedep.h | 3 ++- + src/pcre2_match.c | 7 +++---- + testdata/testinput2 | 4 ++++ + testdata/testoutput2 | 6 ++++++ + 4 files changed, 15 insertions(+), 5 deletions(-) + +diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h +index 5e7e10d..423764d 100644 +--- a/src/pcre2_intmodedep.h ++++ b/src/pcre2_intmodedep.h +@@ -880,7 +880,8 @@ typedef struct match_block { + PCRE2_SPTR start_code; /* For use when recursing */ + PCRE2_SPTR start_subject; /* Start of the subject string */ + PCRE2_SPTR check_subject; /* Where UTF-checked from */ +- PCRE2_SPTR end_subject; /* End of the subject string */ ++ PCRE2_SPTR end_subject; /* Usable end of the subject string */ ++ PCRE2_SPTR true_end_subject; /* Actual end of the subject string */ + PCRE2_SPTR end_match_ptr; /* Subject position at end match */ + PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ + PCRE2_SPTR last_used_ptr; /* Latest consulted character */ +diff --git a/src/pcre2_match.c b/src/pcre2_match.c +index ea03976..c5e84ce 100644 +--- a/src/pcre2_match.c ++++ b/src/pcre2_match.c +@@ -6076,12 +6076,10 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, + if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS; + + /* Fall through */ +- /* Unconditional end of subject assertion (\z). We must check NOTEOL +- because it gets set for invalid UTF fragments. */ ++ /* Unconditional end of subject assertion (\z). */ + + case OP_EOD: +- if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0) +- RRETURN(MATCH_NOMATCH); ++ if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH); + if (mb->partial != 0) + { + mb->hitend = TRUE; +@@ -6891,6 +6889,7 @@ mb->callout_data = mcontext->callout_data; + mb->start_subject = subject; + mb->start_offset = start_offset; + mb->end_subject = end_subject; ++mb->true_end_subject = true_end_subject; + mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0; + mb->allowemptypartial = (re->max_lookbehind > 0) || + (re->flags & PCRE2_MATCH_EMPTY) != 0; +diff --git a/testdata/testinput2 b/testdata/testinput2 +index 0e24e78..b874f20 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -6055,4 +6055,8 @@ a)"xI + + /A +/extended + ++/a\z/ ++ a ++ a\=noteol ++ + # End of testinput2 +diff --git a/testdata/testoutput2 b/testdata/testoutput2 +index 68800fb..c1bc0e6 100644 +--- a/testdata/testoutput2 ++++ b/testdata/testoutput2 +@@ -17946,6 +17946,12 @@ No match + + /A +/extended + ++/a\z/ ++ a ++ 0: a ++ a\=noteol ++ 0: a ++ + # End of testinput2 + Error -70: PCRE2_ERROR_BADDATA (unknown error number) + Error -62: bad serialized data +-- +2.23.0 + diff --git a/backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch b/backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch new file mode 100644 index 0000000000000000000000000000000000000000..f2ee3dabe958bb7c7f66cb0789cac361d20788ac --- /dev/null +++ b/backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch @@ -0,0 +1,80 @@ +From afce00e484cff118a824dac498e8044680dac401 Mon Sep 17 00:00:00 2001 +From: Philip Hazel +Date: Fri, 1 Dec 2023 16:49:59 +0000 +Subject: [PATCH] Fix compile loop in 32-bit mode for characters above the + Unicode limit when caseless and ucp are set. + +Conflict:don't modify ChangeLog; adapt context +Reference:https://github.com/PCRE2Project/pcre2/commit/afce00e484cff118a824dac498e8044680dac401 + +--- + src/pcre2_compile.c | 6 +++++- + testdata/testinput12 | 4 ++++ + testdata/testoutput12-16 | 5 +++++ + testdata/testoutput12-32 | 5 +++++ + 4 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c +index 7b522c5..1935e76 100644 +--- a/src/pcre2_compile.c ++++ b/src/pcre2_compile.c +@@ -5155,10 +5155,14 @@ unsigned int co; + unsigned int co; + + /* Find the first character that has an other case. If it has multiple other +-cases, return its case offset value. */ ++cases, return its case offset value. In 32-bit mode, a value ++greater than the Unicode maximum ends the range. */ + + for (c = *cptr; c <= d; c++) + { ++#if PCRE2_CODE_UNIT_WIDTH == 32 ++ if (c > MAX_UTF_CODE_POINT) return -1; ++#endif + if ((co = UCD_CASESET(c)) != 0) + { + *ocptr = c++; /* Character that has the set */ +diff --git a/testdata/testinput12 b/testdata/testinput12 +index a6678bb..de3d406 100644 +--- a/testdata/testinput12 ++++ b/testdata/testinput12 +@@ -573,4 +573,8 @@ + /\X++/ + a\x{110000}\x{ffffffff} + ++# This used to loop in 32-bit mode; it will fail in 16-bit mode. ++/[\x{ffffffff}]/caseless,ucp ++ \x{ffffffff}xyz ++ + # End of testinput12 +diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 +index f3b40a3..9fa93fa 100644 +--- a/testdata/testoutput12-16 ++++ b/testdata/testoutput12-16 +@@ -1823,4 +1823,9 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to + ** Truncation will probably give the wrong result. + 0: a\x00\x{ffff} + ++# This used to loop in 32-bit mode; it will fail in 16-bit mode. ++/[\x{ffffffff}]/caseless,ucp ++Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large ++ \x{ffffffff}xyz ++ + # End of testinput12 +diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 +index dd42f86..721d8bc 100644 +--- a/testdata/testoutput12-32 ++++ b/testdata/testoutput12-32 +@@ -1817,4 +1817,9 @@ No match + a\x{110000}\x{ffffffff} + 0: a\x{110000}\x{ffffffff} + ++# This used to loop in 32-bit mode; it will fail in 16-bit mode. ++/[\x{ffffffff}]/caseless,ucp ++ \x{ffffffff}xyz ++ 0: \x{ffffffff} ++ + # End of testinput12 +-- +2.23.0 + diff --git a/backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch b/backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch new file mode 100644 index 0000000000000000000000000000000000000000..03bd0e5cf707f6ff3216d1f7f4cc3f3cf743146f --- /dev/null +++ b/backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch @@ -0,0 +1,461 @@ +From ad73148dfb6d06280a4d87f322991762aff90a55 Mon Sep 17 00:00:00 2001 +From: Philip Hazel +Date: Mon, 4 Dec 2023 16:11:41 +0000 +Subject: [PATCH] Fix incorrect matching of 0xffffffff to any character with + more than one other case in 32-bit UCP (but not UTF) mode. + +Conflict:don't modify ChangeLog; use "Lctype == OP_NOTPROP" instead of +"notmatch" because 92d7cf1dd04 is not merged +Reference:https://github.com/PCRE2Project/pcre2/commit/ad73148dfb6d06280a4d87f322991762aff90a55 + +--- + src/pcre2_dfa_match.c | 28 ++++++++++++++++++++++++++ + src/pcre2_match.c | 43 ++++++++++++++++++++++++++++++++++------ + testdata/testinput12 | 26 ++++++++++++++++++++++++ + testdata/testinput14 | 27 +++++++++++++++++++++++++ + testdata/testoutput12-16 | 37 ++++++++++++++++++++++++++++++++++ + testdata/testoutput12-32 | 33 ++++++++++++++++++++++++++++++ + testdata/testoutput14-16 | 38 +++++++++++++++++++++++++++++++++++ + testdata/testoutput14-32 | 34 +++++++++++++++++++++++++++++++ + testdata/testoutput14-8 | 38 +++++++++++++++++++++++++++++++++++ + 9 files changed, 298 insertions(+), 6 deletions(-) + +diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c +index 1c48ad6..caae652 100644 +--- a/src/pcre2_dfa_match.c ++++ b/src/pcre2_dfa_match.c +@@ -1241,6 +1241,13 @@ for (;;) + break; + + case PT_CLIST: ++#if PCRE2_CODE_UNIT_WIDTH == 32 ++ if (c > MAX_UTF_CODE_POINT) ++ { ++ OK = FALSE; ++ break; ++ } ++#endif + cp = PRIV(ucd_caseless_sets) + code[2]; + for (;;) + { +@@ -1516,6 +1523,13 @@ for (;;) + break; + + case PT_CLIST: ++#if PCRE2_CODE_UNIT_WIDTH == 32 ++ if (c > MAX_UTF_CODE_POINT) ++ { ++ OK = FALSE; ++ break; ++ } ++#endif + cp = PRIV(ucd_caseless_sets) + code[3]; + for (;;) + { +@@ -1774,6 +1788,13 @@ for (;;) + break; + + case PT_CLIST: ++#if PCRE2_CODE_UNIT_WIDTH == 32 ++ if (c > MAX_UTF_CODE_POINT) ++ { ++ OK = FALSE; ++ break; ++ } ++#endif + cp = PRIV(ucd_caseless_sets) + code[3]; + for (;;) + { +@@ -2058,6 +2079,13 @@ for (;;) + break; + + case PT_CLIST: ++#if PCRE2_CODE_UNIT_WIDTH == 32 ++ if (c > MAX_UTF_CODE_POINT) ++ { ++ OK = FALSE; ++ break; ++ } ++#endif + cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2]; + for (;;) + { +diff --git a/src/pcre2_match.c b/src/pcre2_match.c +index d162e70..b2e1f23 100644 +--- a/src/pcre2_match.c ++++ b/src/pcre2_match.c +@@ -2565,6 +2565,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, + break; + + case PT_CLIST: ++#if PCRE2_CODE_UNIT_WIDTH == 32 ++ if (fc > MAX_UTF_CODE_POINT) ++ { ++ if (Fop == OP_NOTPROP) break;; ++ RRETURN(MATCH_NOMATCH); ++ } ++#endif + cp = PRIV(ucd_caseless_sets) + Fecode[2]; + for (;;) + { +@@ -2885,6 +2892,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); ++#if PCRE2_CODE_UNIT_WIDTH == 32 ++ if (fc > MAX_UTF_CODE_POINT) ++ { ++ if (Fop == OP_NOTPROP) continue; ++ RRETURN(MATCH_NOMATCH); ++ } ++#endif + cp = PRIV(ucd_caseless_sets) + Lpropvalue; + for (;;) + { +@@ -3698,6 +3712,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); ++#if PCRE2_CODE_UNIT_WIDTH == 32 ++ if (fc > MAX_UTF_CODE_POINT) ++ { ++ if (Lctype == OP_NOTPROP) continue; ++ RRETURN(MATCH_NOMATCH); ++ } ++#endif + cp = PRIV(ucd_caseless_sets) + Lpropvalue; + for (;;) + { +@@ -4278,14 +4299,24 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, + break; + } + GETCHARLENTEST(fc, Feptr, len); +- cp = PRIV(ucd_caseless_sets) + Lpropvalue; +- for (;;) ++#if PCRE2_CODE_UNIT_WIDTH == 32 ++ if (fc > MAX_UTF_CODE_POINT) + { +- if (fc < *cp) +- { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; } +- if (fc == *cp++) +- { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; } ++ if (Lctype != OP_NOTPROP) goto GOT_MAX; + } ++ else ++#endif ++ { ++ cp = PRIV(ucd_caseless_sets) + Lpropvalue; ++ for (;;) ++ { ++ if (fc < *cp) ++ { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; } ++ if (fc == *cp++) ++ { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; } ++ } ++ } ++ + Feptr += len; + } + GOT_MAX: +diff --git a/testdata/testinput12 b/testdata/testinput12 +index de3d406..85550c3 100644 +--- a/testdata/testinput12 ++++ b/testdata/testinput12 +@@ -576,5 +576,31 @@ + # This used to loop in 32-bit mode; it will fail in 16-bit mode. + /[\x{ffffffff}]/caseless,ucp + \x{ffffffff}xyz ++ ++# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They ++# will give errors in 16-bit mode. ++ ++/k*\x{ffffffff}/caseless,ucp ++ \x{ffffffff} ++ ++/k+\x{ffffffff}/caseless,ucp,no_start_optimize ++ K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff} ++ ++/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++ ++/k\x{ffffffff}/caseless,ucp,no_start_optimize ++ K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++ ++/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess ++\= Expect no match ++ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z ++ ++# --------------------------------------------------------- + + # End of testinput12 +diff --git a/testdata/testinput14 b/testdata/testinput14 +index 8a17ae7..8880b5c 100644 +--- a/testdata/testinput14 ++++ b/testdata/testinput14 +@@ -78,4 +78,31 @@ + + # ---------------------------------------------------- + ++# ---------------------------------------------------- ++# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit ++# mode; for the other widths they will fail. ++ ++/k*\x{ffffffff}/caseless,ucp ++ \x{ffffffff} ++ ++/k+\x{ffffffff}/caseless,ucp,no_start_optimize ++ K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff} ++ ++/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++ ++/k\x{ffffffff}/caseless,ucp,no_start_optimize ++ K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++ ++/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess ++\= Expect no match ++ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z ++ ++# ---------------------------------------------------- ++ + # End of testinput14 +diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 +index 9fa93fa..616d693 100644 +--- a/testdata/testoutput12-16 ++++ b/testdata/testoutput12-16 +@@ -1827,5 +1827,42 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to + /[\x{ffffffff}]/caseless,ucp + Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + \x{ffffffff}xyz ++ ++# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They ++# will give errors in 16-bit mode. ++ ++/k*\x{ffffffff}/caseless,ucp ++Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large ++ \x{ffffffff} ++ ++/k+\x{ffffffff}/caseless,ucp,no_start_optimize ++Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large ++ K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff} ++ ++/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize ++Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++ ++/k\x{ffffffff}/caseless,ucp,no_start_optimize ++Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large ++ K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++ ++/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess ++\= Expect no match ++ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z ++** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. ++** Truncation will probably give the wrong result. ++** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. ++** Truncation will probably give the wrong result. ++** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. ++** Truncation will probably give the wrong result. ++No match ++ ++# --------------------------------------------------------- + + # End of testinput12 +diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 +index 721d8bc..3c9586e 100644 +--- a/testdata/testoutput12-32 ++++ b/testdata/testoutput12-32 +@@ -1821,5 +1821,38 @@ No match + /[\x{ffffffff}]/caseless,ucp + \x{ffffffff}xyz + 0: \x{ffffffff} ++ ++# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They ++# will give errors in 16-bit mode. ++ ++/k*\x{ffffffff}/caseless,ucp ++ \x{ffffffff} ++ 0: \x{ffffffff} ++ ++/k+\x{ffffffff}/caseless,ucp,no_start_optimize ++ K\x{ffffffff} ++ 0: K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff} ++No match ++ ++/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++No match ++ ++/k\x{ffffffff}/caseless,ucp,no_start_optimize ++ K\x{ffffffff} ++ 0: K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++No match ++ ++/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess ++\= Expect no match ++ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z ++No match ++ ++# --------------------------------------------------------- + + # End of testinput12 +diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16 +index 61541f6..dd1a977 100644 +--- a/testdata/testoutput14-16 ++++ b/testdata/testoutput14-16 +@@ -122,4 +122,42 @@ No match + + # ---------------------------------------------------- + ++# ---------------------------------------------------- ++# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit ++# mode; for the other widths they will fail. ++ ++/k*\x{ffffffff}/caseless,ucp ++Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large ++ \x{ffffffff} ++ ++/k+\x{ffffffff}/caseless,ucp,no_start_optimize ++Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large ++ K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff} ++ ++/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize ++Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++ ++/k\x{ffffffff}/caseless,ucp,no_start_optimize ++Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large ++ K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++ ++/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess ++\= Expect no match ++ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z ++** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. ++** Truncation will probably give the wrong result. ++** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. ++** Truncation will probably give the wrong result. ++** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. ++** Truncation will probably give the wrong result. ++No match ++ ++# ---------------------------------------------------- ++ + # End of testinput14 +diff --git a/testdata/testoutput14-32 b/testdata/testoutput14-32 +index f1f65b7..dc21569 100644 +--- a/testdata/testoutput14-32 ++++ b/testdata/testoutput14-32 +@@ -122,4 +122,38 @@ No match + + # ---------------------------------------------------- + ++# ---------------------------------------------------- ++# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit ++# mode; for the other widths they will fail. ++ ++/k*\x{ffffffff}/caseless,ucp ++ \x{ffffffff} ++ 0: \x{ffffffff} ++ ++/k+\x{ffffffff}/caseless,ucp,no_start_optimize ++ K\x{ffffffff} ++ 0: K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff} ++No match ++ ++/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++No match ++ ++/k\x{ffffffff}/caseless,ucp,no_start_optimize ++ K\x{ffffffff} ++ 0: K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++No match ++ ++/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess ++\= Expect no match ++ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z ++No match ++ ++# ---------------------------------------------------- ++ + # End of testinput14 +diff --git a/testdata/testoutput14-8 b/testdata/testoutput14-8 +index aa62414..69285db 100644 +--- a/testdata/testoutput14-8 ++++ b/testdata/testoutput14-8 +@@ -122,4 +122,42 @@ Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too + + # ---------------------------------------------------- + ++# ---------------------------------------------------- ++# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit ++# mode; for the other widths they will fail. ++ ++/k*\x{ffffffff}/caseless,ucp ++Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large ++ \x{ffffffff} ++ ++/k+\x{ffffffff}/caseless,ucp,no_start_optimize ++Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large ++ K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff} ++ ++/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize ++Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++ ++/k\x{ffffffff}/caseless,ucp,no_start_optimize ++Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large ++ K\x{ffffffff} ++\= Expect no match ++ \x{ffffffff}\x{ffffffff}\x{ffffffff} ++ ++/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess ++\= Expect no match ++ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z ++** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. ++** Truncation will probably give the wrong result. ++** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. ++** Truncation will probably give the wrong result. ++** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. ++** Truncation will probably give the wrong result. ++No match ++ ++# ---------------------------------------------------- ++ + # End of testinput14 +-- +2.23.0 + diff --git a/backport-Fix-incorrect-patch-in-c1306126.patch b/backport-Fix-incorrect-patch-in-c1306126.patch new file mode 100644 index 0000000000000000000000000000000000000000..4989d7bb966a7a9ebd488a5bff0fbe069da4bbc0 --- /dev/null +++ b/backport-Fix-incorrect-patch-in-c1306126.patch @@ -0,0 +1,97 @@ +From 7fe586b892c9e0cbf3b21d57cfd8135e2311e45c Mon Sep 17 00:00:00 2001 +From: Philip Hazel +Date: Mon, 20 Nov 2023 15:41:06 +0000 +Subject: [PATCH] Fix incorrect patch in c1306126 + +Conflict:adapt context +Reference:https://github.com/PCRE2Project/pcre2/commit/7fe586b892c9e0cbf3b21d57cfd8135e2311e45c + +--- + src/pcre2_compile.c | 20 ++++++++++++++------ + testdata/testinput2 | 2 ++ + testdata/testoutput2 | 2 ++ + 3 files changed, 18 insertions(+), 6 deletions(-) + +diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c +index fdaf2ad..9e45580 100644 +--- a/src/pcre2_compile.c ++++ b/src/pcre2_compile.c +@@ -2782,6 +2782,7 @@ uint32_t *previous_callout = NULL; + uint32_t *parsed_pattern = cb->parsed_pattern; + uint32_t *parsed_pattern_end = cb->parsed_pattern_end; + uint32_t *this_parsed_item = NULL; ++uint32_t *prev_parsed_item = NULL; + uint32_t meta_quantifier = 0; + uint32_t add_after_mark = 0; + uint32_t extra_options = cb->cx->extra_options; +@@ -2867,11 +2868,10 @@ while (ptr < ptrend) + uint32_t set, unset, *optset; + uint32_t terminator; + uint32_t prev_meta_quantifier; +- uint32_t *prev_parsed_item = this_parsed_item; + BOOL prev_okquantifier; + PCRE2_SPTR tempptr; + PCRE2_SIZE offset; +- ++ + if (parsed_pattern >= parsed_pattern_end) + { + errorcode = ERR63; /* Internal error (parsed pattern overflow) */ +@@ -2883,10 +2883,17 @@ while (ptr < ptrend) + errorcode = ERR19; + goto FAILED; /* Parentheses too deeply nested */ + } +- +- /* Remember where this item started */ + +- this_parsed_item = parsed_pattern; ++ /* If the last time round this loop something was added, parsed_pattern will ++ no longer be equal to this_parsed_item. Remember where the previous item ++ started and reset for the next item. Note that sometimes round the loop, ++ nothing gets added (e.g. for ignored white space). */ ++ ++ if (this_parsed_item != parsed_pattern) ++ { ++ prev_parsed_item = this_parsed_item; ++ this_parsed_item = parsed_pattern; ++ } + + /* Get next input character, save its position for callout handling. */ + +@@ -3440,7 +3447,8 @@ while (ptr < ptrend) + + /* ---- Quantifier post-processing ---- */ + +- /* Check that a quantifier is allowed after the previous item. */ ++ /* Check that a quantifier is allowed after the previous item. This ++ guarantees that there is a previous item. */ + + CHECK_QUANTIFIER: + if (!prev_okquantifier) +diff --git a/testdata/testinput2 b/testdata/testinput2 +index ba292d8..da845c1 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -6051,4 +6051,6 @@ a)"xI + -- + \[X]{-10} + ++/A +/extended ++ + # End of testinput2 +diff --git a/testdata/testoutput2 b/testdata/testoutput2 +index 888f06a..85de4ae 100644 +--- a/testdata/testoutput2 ++++ b/testdata/testoutput2 +@@ -17932,6 +17932,8 @@ No match + \[X]{-10} + ** Zero or negative repeat not allowed + ++/A +/extended ++ + # End of testinput2 + Error -70: PCRE2_ERROR_BADDATA (unknown error number) + Error -62: bad serialized data +-- +2.23.0 + diff --git a/backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch b/backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..10ab2146b8bd10704d5576ea43794e588275b441 --- /dev/null +++ b/backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch @@ -0,0 +1,29 @@ +From b88126f42382fa470b6480f82489303d4311ce18 Mon Sep 17 00:00:00 2001 +From: Philip Hazel +Date: Thu, 16 Nov 2023 13:49:49 +0000 +Subject: [PATCH] Fix oversight in DFA when changing OP_REVERSE; also add some + unrelated tests + +Conflict:don't add unrelated tests +Reference:https://github.com/PCRE2Project/pcre2/commit/b88126f42382fa470b6480f82489303d4311ce18 + +--- + src/pcre2_dfa_match.c | 2 +- + 1 files changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c +index e90c984..5768407 100644 +--- a/src/pcre2_dfa_match.c ++++ b/src/pcre2_dfa_match.c +@@ -591,7 +591,7 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT) + end_code = this_start_code; + do + { +- size_t back = (size_t)GET(end_code, 2+LINK_SIZE); ++ size_t back = (size_t)GET2(end_code, 2+LINK_SIZE); + if (back > max_back) max_back = back; + end_code += GET(end_code, 1); + } +-- +2.23.0 + diff --git a/backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch b/backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch new file mode 100644 index 0000000000000000000000000000000000000000..59dd5c24ccd5f07c230684c724b44b3053813537 --- /dev/null +++ b/backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch @@ -0,0 +1,108 @@ +From 05206d66340341bef7a673108a855f594c148950 Mon Sep 17 00:00:00 2001 +From: Philip Hazel +Date: Sun, 19 Nov 2023 18:32:10 +0000 +Subject: [PATCH] Fix \z behaviour when matching within invalid UTF + +Conflict:don't modify ChangeLog; adapt context +Reference:https://github.com/PCRE2Project/pcre2/commit/05206d66340341bef7a673108a855f594c148950 + +--- + src/pcre2_match.c | 6 ++++-- + testdata/testinput10 | 3 +++ + testdata/testinput12 | 3 +++ + testdata/testoutput10 | 4 ++++ + testdata/testoutput12-16 | 4 ++++ + testdata/testoutput12-32 | 4 ++++ + 6 files changed, 22 insertions(+), 2 deletions(-) + +diff --git a/src/pcre2_match.c b/src/pcre2_match.c +index 2dcf8c4..ea03976 100644 +--- a/src/pcre2_match.c ++++ b/src/pcre2_match.c +@@ -6076,10 +6076,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, + if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS; + + /* Fall through */ +- /* Unconditional end of subject assertion (\z) */ ++ /* Unconditional end of subject assertion (\z). We must check NOTEOL ++ because it gets set for invalid UTF fragments. */ + + case OP_EOD: +- if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH); ++ if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0) ++ RRETURN(MATCH_NOMATCH); + if (mb->partial != 0) + { + mb->hitend = TRUE; +diff --git a/testdata/testinput10 b/testdata/testinput10 +index c7618b1..e901d51 100644 +--- a/testdata/testinput10 ++++ b/testdata/testinput10 +@@ -642,4 +642,7 @@ + qchq\=ph + qchq\=ps + ++/A\z/utf,match_invalid_utf ++ A\x80\x42\n ++ + # End of testinput10 +diff --git a/testdata/testinput12 b/testdata/testinput12 +index 1e552e6..5a2d8d2 100644 +--- a/testdata/testinput12 ++++ b/testdata/testinput12 +@@ -464,6 +464,9 @@ + + /aa/utf,ucp,match_invalid_utf,global + \x{d800}aa ++ ++/A\z/utf,match_invalid_utf ++ A\x{df00}\n + + # ---------------------------------------------------- + +diff --git a/testdata/testoutput10 b/testdata/testoutput10 +index 18dd9d2..8145891 100644 +--- a/testdata/testoutput10 ++++ b/testdata/testoutput10 +@@ -1921,4 +1921,8 @@ Partial match: + qchq\=ps + Partial match: + ++/A\z/utf,match_invalid_utf ++ A\x80\x42\n ++No match ++ + # End of testinput10 +diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 +index 8cbc13d..9ac403e 100644 +--- a/testdata/testoutput12-16 ++++ b/testdata/testoutput12-16 +@@ -1607,6 +1607,10 @@ No match + /aa/utf,ucp,match_invalid_utf,global + \x{d800}aa + 0: aa ++ ++/A\z/utf,match_invalid_utf ++ A\x{df00}\n ++No match + + # ---------------------------------------------------- + +diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 +index 1a98b4b..9396305 100644 +--- a/testdata/testoutput12-32 ++++ b/testdata/testoutput12-32 +@@ -1605,6 +1605,10 @@ No match + /aa/utf,ucp,match_invalid_utf,global + \x{d800}aa + 0: aa ++ ++/A\z/utf,match_invalid_utf ++ A\x{df00}\n ++No match + + # ---------------------------------------------------- + +-- +2.23.0 + diff --git a/pcre2.spec b/pcre2.spec index c30aa072d2d77df00dfac5c39efb30f63e781f50..38913476c127e3fb5acfca68a3be4c1d154d33fe 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -1,6 +1,6 @@ Name: pcre2 Version: 10.35 -Release: 6 +Release: 7 Summary: Perl Compatible Regular Expressions License: BSD URL: http://www.pcre.org/ @@ -37,6 +37,18 @@ Patch6020: backport-jit-fail-early-in-ffcps_-if-subject-shorter-than-off.pat Patch6021: backport-jit-fix-pcre2_jit_free_unused_memory-if-sljit-not-us.patch Patch6022: backport-fix-CVE-2022-41409.patch Patch6023: backport-fix-a-possible-integer-overflow-in-DFA-matching-305.patch +Patch6024: backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch +Patch6025: backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch +Patch6026: backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch +Patch6027: backport-Fix-incorrect-patch-in-c1306126.patch +Patch6028: backport-Fix-another-oversight-in-c1306126.patch +Patch6029: backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch +Patch6030: backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch +Patch6031: backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch +Patch6032: backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch +Patch6033: backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch +Patch6034: backport-Fix-accept-and-endanchored-interaction-in-JIT.patch +Patch6035: backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch BuildRequires: autoconf libtool automake coreutils gcc make readline-devel Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools @@ -153,6 +165,9 @@ make check %{_pkgdocdir}/html/ %changelog +* Mon Jan 22 2024 xujing - 10.35-7 +- DESC:sync patches from upstream to fix some bugs + * Thu Dec 14 2023 xujing - 10.35-6 - DESC:fix a possible integer overflow in DFA matching (#305)