From 47c5e7ad9541c63188cea8243651d94164a87b96 Mon Sep 17 00:00:00 2001 From: yixiangzhike Date: Fri, 22 Aug 2025 15:55:12 +0800 Subject: [PATCH] Avoid truncation of error offset (cherry picked from commit f49651068630bb1fbe431bd4af6a7892c15fd709) --- ...valid_utf-to-avoid-truncation-of-err.patch | 198 ++++++++++++++++++ pcre2.spec | 8 +- 2 files changed, 204 insertions(+), 2 deletions(-) create mode 100644 backport-minor-tweaks-to-valid_utf-to-avoid-truncation-of-err.patch diff --git a/backport-minor-tweaks-to-valid_utf-to-avoid-truncation-of-err.patch b/backport-minor-tweaks-to-valid_utf-to-avoid-truncation-of-err.patch new file mode 100644 index 0000000..f6f3e8e --- /dev/null +++ b/backport-minor-tweaks-to-valid_utf-to-avoid-truncation-of-err.patch @@ -0,0 +1,198 @@ +From fdd3ce7e2ad51f38d7c7a47c92f2aa46b290a0f7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= +Date: Tue, 17 Jan 2023 06:43:20 -0800 +Subject: [PATCH] minor tweaks to valid_utf() to avoid truncation of error + offset (#181) + +e8cdae3 (Correct an incorrect cast., 2017-04-14) started changing +some of the casts to fit the type of the error offset variable, so +complete that, and for consistency, add the same type of casts to +the non UTF-8 code. +--- + src/pcre2_valid_utf.c | 48 +++++++++++++++++++++---------------------- + 1 file changed, 24 insertions(+), 24 deletions(-) + +diff --git a/src/pcre2_valid_utf.c b/src/pcre2_valid_utf.c +index e47ea78f..de411b91 100644 +--- a/src/pcre2_valid_utf.c ++++ b/src/pcre2_valid_utf.c +@@ -171,7 +171,7 @@ for (p = string; length > 0; p++) + + if (((d = *(++p)) & 0xc0) != 0x80) + { +- *erroroffset = (int)(p - string) - 1; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 1; + return PCRE2_ERROR_UTF8_ERR6; + } + +@@ -186,7 +186,7 @@ for (p = string; length > 0; p++) + + case 1: if ((c & 0x3e) == 0) + { +- *erroroffset = (int)(p - string) - 1; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 1; + return PCRE2_ERROR_UTF8_ERR15; + } + break; +@@ -198,17 +198,17 @@ for (p = string; length > 0; p++) + case 2: + if ((*(++p) & 0xc0) != 0x80) /* Third byte */ + { +- *erroroffset = (int)(p - string) - 2; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR7; + } + if (c == 0xe0 && (d & 0x20) == 0) + { +- *erroroffset = (int)(p - string) - 2; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR16; + } + if (c == 0xed && d >= 0xa0) + { +- *erroroffset = (int)(p - string) - 2; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR14; + } + break; +@@ -220,22 +220,22 @@ for (p = string; length > 0; p++) + case 3: + if ((*(++p) & 0xc0) != 0x80) /* Third byte */ + { +- *erroroffset = (int)(p - string) - 2; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR7; + } + if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ + { +- *erroroffset = (int)(p - string) - 3; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 3; + return PCRE2_ERROR_UTF8_ERR8; + } + if (c == 0xf0 && (d & 0x30) == 0) + { +- *erroroffset = (int)(p - string) - 3; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 3; + return PCRE2_ERROR_UTF8_ERR17; + } + if (c > 0xf4 || (c == 0xf4 && d > 0x8f)) + { +- *erroroffset = (int)(p - string) - 3; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 3; + return PCRE2_ERROR_UTF8_ERR13; + } + break; +@@ -251,22 +251,22 @@ for (p = string; length > 0; p++) + case 4: + if ((*(++p) & 0xc0) != 0x80) /* Third byte */ + { +- *erroroffset = (int)(p - string) - 2; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR7; + } + if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ + { +- *erroroffset = (int)(p - string) - 3; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 3; + return PCRE2_ERROR_UTF8_ERR8; + } + if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */ + { +- *erroroffset = (int)(p - string) - 4; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 4; + return PCRE2_ERROR_UTF8_ERR9; + } + if (c == 0xf8 && (d & 0x38) == 0) + { +- *erroroffset = (int)(p - string) - 4; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 4; + return PCRE2_ERROR_UTF8_ERR18; + } + break; +@@ -277,27 +277,27 @@ for (p = string; length > 0; p++) + case 5: + if ((*(++p) & 0xc0) != 0x80) /* Third byte */ + { +- *erroroffset = (int)(p - string) - 2; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR7; + } + if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ + { +- *erroroffset = (int)(p - string) - 3; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 3; + return PCRE2_ERROR_UTF8_ERR8; + } + if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */ + { +- *erroroffset = (int)(p - string) - 4; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 4; + return PCRE2_ERROR_UTF8_ERR9; + } + if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */ + { +- *erroroffset = (int)(p - string) - 5; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 5; + return PCRE2_ERROR_UTF8_ERR10; + } + if (c == 0xfc && (d & 0x3c) == 0) + { +- *erroroffset = (int)(p - string) - 5; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 5; + return PCRE2_ERROR_UTF8_ERR19; + } + break; +@@ -309,7 +309,7 @@ for (p = string; length > 0; p++) + + if (ab > 3) + { +- *erroroffset = (int)(p - string) - ab; ++ *erroroffset = (PCRE2_SIZE)(p - string) - ab; + return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12; + } + } +@@ -340,21 +340,21 @@ for (p = string; length > 0; p++) + /* High surrogate. Must be a followed by a low surrogate. */ + if (length == 0) + { +- *erroroffset = p - string; ++ *erroroffset = (PCRE2_SIZE)(p - string); + return PCRE2_ERROR_UTF16_ERR1; + } + p++; + length--; + if ((*p & 0xfc00) != 0xdc00) + { +- *erroroffset = p - string - 1; ++ *erroroffset = (PCRE2_SIZE)(p - string) - 1; + return PCRE2_ERROR_UTF16_ERR2; + } + } + else + { + /* Isolated low surrogate. Always an error. */ +- *erroroffset = p - string; ++ *erroroffset = (PCRE2_SIZE)(p - string); + return PCRE2_ERROR_UTF16_ERR3; + } + } +@@ -379,14 +379,14 @@ for (p = string; length > 0; length--, p++) + /* Normal UTF-32 code point. Neither high nor low surrogate. */ + if (c > 0x10ffffu) + { +- *erroroffset = p - string; ++ *erroroffset = (PCRE2_SIZE)(p - string); + return PCRE2_ERROR_UTF32_ERR2; + } + } + else + { + /* A surrogate */ +- *erroroffset = p - string; ++ *erroroffset = (PCRE2_SIZE)(p - string); + return PCRE2_ERROR_UTF32_ERR1; + } + } +-- +2.43.0 + diff --git a/pcre2.spec b/pcre2.spec index ae2da0c..ab6aa88 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -1,6 +1,6 @@ Name: pcre2 Version: 10.42 -Release: 13 +Release: 14 Summary: Perl Compatible Regular Expressions License: BSD URL: http://www.pcre.org/ @@ -46,6 +46,7 @@ Patch6034: backport-Improve-error-offsets-for-character-classes-548.patch Patch6035: backport-Non-recursive-scan-prefix-in-JIT-560.patch Patch6036: backport-Mend-a-bug-in-pcre2grep-that-caused-separator-lines-.patch Patch6037: backport-Fix-oversight-in-adding-new-pcre2grep-test.patch +Patch6038: backport-minor-tweaks-to-valid_utf-to-avoid-truncation-of-err.patch BuildRequires: autoconf libtool automake coreutils gcc make readline-devel Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools @@ -163,7 +164,10 @@ make check %{_pkgdocdir}/html/ %changelog -*Thu Mar 13 2025 Linux_zhang - 10.42-13 +* Fri Aug 22 2025 yixiangzhike - 10.42-14 +- DESC:sync patch from upstream to avoid truncation of error offset + +* Thu Mar 13 2025 Linux_zhang - 10.42-13 - DESC:sync patches from upstream to fix a bug in pcre2grep * Tue Dec 10 2024 hugel - 10.42-12 -- Gitee