代码拉取完成,页面将自动刷新
同步操作将从 src-openEuler/pcre2 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
From a6089462a460a9f6c2db63a86e1c09fabaa81499 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Wed, 1 Feb 2023 17:42:29 +0000
Subject: [PATCH] Additional PCRE2_EXTRA_ASCII_xxx code
Conflict:NA
Reference:https://github.com/PCRE2Project/pcre2/commit/a6089462a460a9f6c2db63a86e1c09fabaa81499
---
src/pcre2.h.in | 4 +
src/pcre2_compile.c | 375 ++++++++++++++++++++++++++-----------------
src/pcre2test.c | 21 ++-
testdata/testinput5 | 133 +++++++++++++++
testdata/testinput7 | 133 +++++++++++++++
testdata/testoutput5 | 179 +++++++++++++++++++++
testdata/testoutput7 | 179 +++++++++++++++++++++
7 files changed, 869 insertions(+), 155 deletions(-)
diff --git a/src/pcre2.h.in b/src/pcre2.h.in
index 11419a38..7202c633 100644
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@@ -154,6 +154,10 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */
#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */
+#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */
+#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */
+#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */
+#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */
/* These are for pcre2_jit_compile(). */
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index ed2fe8a7..b8a9e098 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -123,7 +123,7 @@ static unsigned int
#endif
static int
- compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *,
+ compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *,
uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *,
compile_block *, PCRE2_SIZE *);
@@ -694,8 +694,8 @@ static uint32_t chartypeoffset[] = {
now all in a single string, to reduce the number of relocations when a shared
library is dynamically loaded. The list of lengths is terminated by a zero
length entry. The first three must be alpha, lower, upper, as this is assumed
-for handling case independence. The indices for graph, print, and punct are
-needed, so identify them. */
+for handling case independence. The indices for several classes are needed, so
+identify them. */
static const char posix_names[] =
STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
@@ -785,7 +785,8 @@ are allowed. */
(PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \
PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \
- PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)
+ PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK|PCRE2_EXTRA_ASCII_BSD| \
+ PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX)
/* Compile time error code numbers. They are given names so that they can more
easily be tracked. When a new number is added, the tables called eint1 and
@@ -1059,9 +1060,9 @@ for (;;)
case META_SKIP: fprintf(stderr, "META (*SKIP)"); break;
case META_THEN: fprintf(stderr, "META (*THEN)"); break;
- case META_OPTIONS:
- fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr[0], pptr[1]);
- pptr += 2;
+ case META_OPTIONS:
+ fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr[0], pptr[1]);
+ pptr += 2;
break;
case META_LOOKBEHIND:
@@ -1494,7 +1495,7 @@ Arguments:
chptr points to a returned data character
errorcodeptr points to the errorcode variable (containing zero)
options the current options bits
- xoptions the current extra options bits
+ xoptions the current extra options bits
isclass TRUE if inside a character class
cb compile data block or NULL when called from pcre2_substitute()
@@ -2536,6 +2537,85 @@ return parsed_pattern;
+/*************************************************
+* Handle \d, \D, \s, \S, \w, \W *
+*************************************************/
+
+/* This function is called from parse_regex() below, both for freestanding
+escapes, and those within classes, to handle those escapes that may change when
+Unicode property support is requested. Note that PCRE2_UCP will never be set
+without Unicode support because that is checked when pcre2_compile() is called.
+
+Arguments:
+ escape the ESC_... value
+ parsed_pattern where to add the code
+ options options bits
+ xoptions extra options bits
+
+Returns: updated value of parsed_pattern
+*/
+static uint32_t *
+handle_escdsw(int escape, uint32_t *parsed_pattern, uint32_t options,
+ uint32_t xoptions)
+{
+uint32_t ascii_option = 0;
+uint32_t prop = ESC_p;
+
+switch(escape)
+ {
+ case ESC_D:
+ prop = ESC_P;
+ /* Fall through */
+ case ESC_d:
+ ascii_option = PCRE2_EXTRA_ASCII_BSD;
+ break;
+
+ case ESC_S:
+ prop = ESC_P;
+ /* Fall through */
+ case ESC_s:
+ ascii_option = PCRE2_EXTRA_ASCII_BSS;
+ break;
+
+ case ESC_W:
+ prop = ESC_P;
+ /* Fall through */
+ case ESC_w:
+ ascii_option = PCRE2_EXTRA_ASCII_BSW;
+ break;
+ }
+
+if ((options & PCRE2_UCP) == 0 || (xoptions & ascii_option) != 0)
+ {
+ *parsed_pattern++ = META_ESCAPE + escape;
+ }
+else
+ {
+ *parsed_pattern++ = META_ESCAPE + prop;
+ switch(escape)
+ {
+ case ESC_d:
+ case ESC_D:
+ *parsed_pattern++ = (PT_PC << 16) | ucp_Nd;
+ break;
+
+ case ESC_s:
+ case ESC_S:
+ *parsed_pattern++ = PT_SPACE << 16;
+ break;
+
+ case ESC_w:
+ case ESC_W:
+ *parsed_pattern++ = PT_WORD << 16;
+ break;
+ }
+ }
+
+return parsed_pattern;
+}
+
+
+
/*************************************************
* Parse regex and identify named groups *
*************************************************/
@@ -2564,7 +2644,7 @@ typedef struct nest_save {
uint16_t max_group;
uint16_t flags;
uint32_t options;
- uint32_t xoptions;
+ uint32_t xoptions;
} nest_save;
#define NSF_RESET 0x0001u
@@ -2579,8 +2659,11 @@ the main compiling phase. */
#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
PCRE2_UNGREEDY)
-
-#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT)
+
+#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT)
+
+#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT| \
+ PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW)
/* States used for analyzing ranges in character classes. The two OK values
must be last. */
@@ -3115,9 +3198,7 @@ while (ptr < ptrend)
*parsed_pattern++ = META_ESCAPE + escape;
break;
- /* Escapes that change in UCP mode. Note that PCRE2_UCP will never be set
- without Unicode support because it is checked when pcre2_compile() is
- called. */
+ /* Escapes that may change in UCP mode. */
case ESC_d:
case ESC_D:
@@ -3126,33 +3207,8 @@ while (ptr < ptrend)
case ESC_w:
case ESC_W:
okquantifier = TRUE;
- if ((options & PCRE2_UCP) == 0)
- {
- *parsed_pattern++ = META_ESCAPE + escape;
- }
- else
- {
- *parsed_pattern++ = META_ESCAPE +
- ((escape == ESC_d || escape == ESC_s || escape == ESC_w)?
- ESC_p : ESC_P);
- switch(escape)
- {
- case ESC_d:
- case ESC_D:
- *parsed_pattern++ = (PT_PC << 16) | ucp_Nd;
- break;
-
- case ESC_s:
- case ESC_S:
- *parsed_pattern++ = PT_SPACE << 16;
- break;
-
- case ESC_w:
- case ESC_W:
- *parsed_pattern++ = PT_WORD << 16;
- break;
- }
- }
+ parsed_pattern = handle_escdsw(escape, parsed_pattern, options,
+ xoptions);
break;
/* Unicode property matching */
@@ -3515,18 +3571,22 @@ while (ptr < ptrend)
class_range_state = RANGE_NO;
- /* When PCRE2_UCP is set, some of the POSIX classes are converted to
- use Unicode properties \p or \P or, in one case, \h or \H. The
- substitutes table has two values per class, containing the type and
- value of a \p or \P item. The special cases are specified with a
- negative type: a non-zero value causes \h or \H to be used, and a zero
- value falls through to behave like a non-UCP POSIX class. */
+ /* When PCRE2_UCP is set, unless PCRE2_EXTRA_ASCII_POSIX is set, some
+ of the POSIX classes are converted to use Unicode properties \p or \P
+ or, in one case, \h or \H. The substitutes table has two values per
+ class, containing the type and value of a \p or \P item. The special
+ cases are specified with a negative type: a non-zero value causes \h or
+ \H to be used, and a zero value falls through to behave like a non-UCP
+ POSIX class. There are now also some extra options that force ASCII for
+ some classes. */
#ifdef SUPPORT_UNICODE
- if ((options & PCRE2_UCP) != 0)
+ if ((options & PCRE2_UCP) != 0 &&
+ (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0)
{
int ptype = posix_substitutes[2*posix_class];
int pvalue = posix_substitutes[2*posix_class + 1];
+
if (ptype >= 0)
{
*parsed_pattern++ = META_ESCAPE + (posix_negate? ESC_P : ESC_p);
@@ -3664,7 +3724,7 @@ while (ptr < ptrend)
*parsed_pattern++ = META_ESCAPE + escape;
break;
- /* These escapes are converted to Unicode property tests when
+ /* These escapes may be converted to Unicode property tests when
PCRE2_UCP is set. */
case ESC_d:
@@ -3673,33 +3733,8 @@ while (ptr < ptrend)
case ESC_S:
case ESC_w:
case ESC_W:
- if ((options & PCRE2_UCP) == 0)
- {
- *parsed_pattern++ = META_ESCAPE + escape;
- }
- else
- {
- *parsed_pattern++ = META_ESCAPE +
- ((escape == ESC_d || escape == ESC_s || escape == ESC_w)?
- ESC_p : ESC_P);
- switch(escape)
- {
- case ESC_d:
- case ESC_D:
- *parsed_pattern++ = (PT_PC << 16) | ucp_Nd;
- break;
-
- case ESC_s:
- case ESC_S:
- *parsed_pattern++ = PT_SPACE << 16;
- break;
-
- case ESC_w:
- case ESC_W:
- *parsed_pattern++ = PT_WORD << 16;
- break;
- }
- }
+ parsed_pattern = handle_escdsw(escape, parsed_pattern, options,
+ xoptions);
break;
/* Explicit Unicode property matching */
@@ -4052,7 +4087,7 @@ while (ptr < ptrend)
{
BOOL hyphenok = TRUE;
uint32_t oldoptions = options;
- uint32_t oldxoptions = xoptions;
+ uint32_t oldxoptions = xoptions;
top_nest->reset_group = 0;
top_nest->max_group = 0;
@@ -4067,7 +4102,7 @@ while (ptr < ptrend)
{
options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
- xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT);
+ xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT);
hyphenok = FALSE;
ptr++;
}
@@ -4085,10 +4120,44 @@ while (ptr < ptrend)
goto FAILED;
}
optset = &unset;
- xoptset = &xunset;
+ xoptset = &xunset;
hyphenok = FALSE;
break;
+ /* There are some two-character sequences that start with 'a'. */
+
+ case CHAR_a:
+ if (ptr < ptrend)
+ {
+ if (*ptr == CHAR_D)
+ {
+ *xoptset |= PCRE2_EXTRA_ASCII_BSD;
+ ptr++;
+ break;
+ }
+ if (*ptr == CHAR_P)
+ {
+ *xoptset |= PCRE2_EXTRA_ASCII_POSIX;
+ ptr++;
+ break;
+ }
+ if (*ptr == CHAR_S)
+ {
+ *xoptset |= PCRE2_EXTRA_ASCII_BSS;
+ ptr++;
+ break;
+ }
+ if (*ptr == CHAR_W)
+ {
+ *xoptset |= PCRE2_EXTRA_ASCII_BSW;
+ ptr++;
+ break;
+ }
+ }
+ *xoptset |= PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|
+ PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX;
+ break;
+
case CHAR_J: /* Record that it changed in the external options */
*optset |= PCRE2_DUPNAMES;
cb->external_flags |= PCRE2_JCHANGED;
@@ -4097,7 +4166,7 @@ while (ptr < ptrend)
case CHAR_i: *optset |= PCRE2_CASELESS; break;
case CHAR_m: *optset |= PCRE2_MULTILINE; break;
case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break;
- case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break;
+ case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break;
case CHAR_s: *optset |= PCRE2_DOTALL; break;
case CHAR_U: *optset |= PCRE2_UNGREEDY; break;
@@ -4757,7 +4826,7 @@ while (ptr < ptrend)
if (top_nest != NULL && top_nest->nest_depth == nest_depth)
{
options = (options & ~PARSE_TRACKED_OPTIONS) | top_nest->options;
- xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions;
+ xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions;
if ((top_nest->flags & NSF_RESET) != 0 &&
top_nest->max_group > cb->bracount)
cb->bracount = top_nest->max_group;
@@ -5019,7 +5088,7 @@ Arguments:
classbits the bit map for characters < 256
uchardptr points to the pointer for extra data
options the options bits
- xoptions the extra options bits
+ xoptions the extra options bits
cb compile data
start start of range character
end end of range character
@@ -5030,7 +5099,7 @@ Returns: the number of < 256 characters added
static unsigned int
add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
- uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start,
+ uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start,
uint32_t end)
{
uint32_t c;
@@ -5039,7 +5108,7 @@ unsigned int n8 = 0;
/* If caseless matching is required, scan the range and process alternate
cases. In Unicode, there are 8-bit characters that have alternate cases that
-are greater than 255 and vice-versa (though these may be ignored if caseless
+are greater than 255 and vice-versa (though these may be ignored if caseless
restriction is in force). Sometimes we can just extend the original range. */
if ((options & PCRE2_CASELESS) != 0)
@@ -5053,17 +5122,17 @@ if ((options & PCRE2_CASELESS) != 0)
options &= ~PCRE2_CASELESS; /* Remove for recursive calls */
c = start;
- while ((rc = get_othercase_range(&c, end, &oc, &od,
+ while ((rc = get_othercase_range(&c, end, &oc, &od,
(xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)) >= 0)
{
/* Handle a single character that has more than one other case. */
- if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr,
+ if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr,
options, xoptions, cb, PRIV(ucd_caseless_sets) + rc, oc);
/* Do nothing if the other case range is within the original range. */
- else if (oc >= cb->class_range_start && od <= cb->class_range_end)
+ else if (oc >= cb->class_range_start && od <= cb->class_range_end)
continue;
/* Extend the original range if there is overlap, noting that if oc < c,
@@ -5178,7 +5247,7 @@ Arguments:
classbits the bit map for characters < 256
uchardptr points to the pointer for extra data
options the options bits
- xoptions the extra options bits
+ xoptions the extra options bits
cb contains pointers to tables etc.
p points to row of 32-bit values, terminated by NOTACHAR
except character to omit; this is used when adding lists of
@@ -5191,7 +5260,7 @@ Returns: the number of < 256 characters added
static unsigned int
add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
- uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p,
+ uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p,
unsigned int except)
{
unsigned int n8 = 0;
@@ -5201,7 +5270,7 @@ while (p[0] < NOTACHAR)
if (p[0] != except)
{
while(p[n+1] == p[0] + n + 1) n++;
- n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
+ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
p[0], p[n]);
}
p += n + 1;
@@ -5223,7 +5292,7 @@ Arguments:
classbits the bit map for characters < 256
uchardptr points to the pointer for extra data
options the options bits
- xoptions the extra options bits
+ xoptions the extra options bits
cb compile data
start start of range character
end end of range character
@@ -5238,7 +5307,7 @@ add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
{
cb->class_range_start = start;
cb->class_range_end = end;
-return add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
+return add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
start, end);
}
@@ -5257,7 +5326,7 @@ Arguments:
classbits the bit map for characters < 256
uchardptr points to the pointer for extra data
options the options bits
- xoptions the extra options bits
+ xoptions the extra options bits
cb contains pointers to tables etc.
p points to row of 32-bit values, terminated by NOTACHAR
except character to omit; this is used when adding lists of
@@ -5281,7 +5350,7 @@ while (p[0] < NOTACHAR)
while(p[n+1] == p[0] + n + 1) n++;
cb->class_range_start = p[0];
cb->class_range_end = p[n];
- n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
+ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
p[0], p[n]);
}
p += n + 1;
@@ -5302,7 +5371,7 @@ Arguments:
classbits the bit map for characters < 256
uchardptr points to the pointer for extra data
options the options bits
- xoptions the extra options bits
+ xoptions the extra options bits
cb contains pointers to tables etc.
p points to row of 32-bit values, terminated by NOTACHAR
@@ -5412,7 +5481,7 @@ real compile phase. The value of lengthptr distinguishes the two phases.
Arguments:
optionsptr pointer to the option bits
- xoptionsptr pointer to the extra option bits
+ xoptionsptr pointer to the extra option bits
codeptr points to the pointer to the current code point
pptrptr points to the current parsed pattern pointer
errorcodeptr points to error code variable
@@ -5431,10 +5500,10 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero
*/
static int
-compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr,
- PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr,
- uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr,
- uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb,
+compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr,
+ PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr,
+ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr,
+ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb,
PCRE2_SIZE *lengthptr)
{
int bravalue = 0;
@@ -5757,8 +5826,8 @@ for (;; pptr++)
uint32_t c = pptr[1];
#ifdef SUPPORT_UNICODE
- if (UCD_CASESET(c) == 0 ||
- ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 &&
+ if (UCD_CASESET(c) == 0 ||
+ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 &&
c < 128 && pptr[2] < 128))
#endif
{
@@ -5851,41 +5920,45 @@ for (;; pptr++)
XCL_PROP/XCL_NOTPROP directly, which is done here. */
#ifdef SUPPORT_UNICODE
- if ((options & PCRE2_UCP) != 0) switch(posix_class)
+ if ((options & PCRE2_UCP) != 0 &&
+ (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0)
{
- case PC_GRAPH:
- case PC_PRINT:
- case PC_PUNCT:
- *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
- *class_uchardata++ = (PCRE2_UCHAR)
- ((posix_class == PC_GRAPH)? PT_PXGRAPH :
- (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT);
- *class_uchardata++ = 0;
- xclass_has_prop = TRUE;
- goto CONTINUE_CLASS;
-
- /* For the other POSIX classes (ascii, xdigit) we are going to
- fall through to the non-UCP case and build a bit map for
- characters with code points less than 256. However, if we are in
- a negated POSIX class, characters with code points greater than
- 255 must either all match or all not match, depending on whether
- the whole class is not or is negated. For example, for
- [[:^ascii:]... they must all match, whereas for [^[:^xdigit:]...
- they must not.
-
- In the special case where there are no xclass items, this is
- automatically handled by the use of OP_CLASS or OP_NCLASS, but an
- explicit range is needed for OP_XCLASS. Setting a flag here
- causes the range to be generated later when it is known that
- OP_XCLASS is required. In the 8-bit library this is relevant only in
- utf mode, since no wide characters can exist otherwise. */
+ switch(posix_class)
+ {
+ case PC_GRAPH:
+ case PC_PRINT:
+ case PC_PUNCT:
+ *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
+ *class_uchardata++ = (PCRE2_UCHAR)
+ ((posix_class == PC_GRAPH)? PT_PXGRAPH :
+ (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT);
+ *class_uchardata++ = 0;
+ xclass_has_prop = TRUE;
+ goto CONTINUE_CLASS;
+
+ /* For the other POSIX classes (ascii, xdigit) we are going to
+ fall through to the non-UCP case and build a bit map for
+ characters with code points less than 256. However, if we are in
+ a negated POSIX class, characters with code points greater than
+ 255 must either all match or all not match, depending on whether
+ the whole class is not or is negated. For example, for
+ [[:^ascii:]... they must all match, whereas for [^[:^xdigit:]...
+ they must not.
+
+ In the special case where there are no xclass items, this is
+ automatically handled by the use of OP_CLASS or OP_NCLASS, but an
+ explicit range is needed for OP_XCLASS. Setting a flag here
+ causes the range to be generated later when it is known that
+ OP_XCLASS is required. In the 8-bit library this is relevant only in
+ utf mode, since no wide characters can exist otherwise. */
- default:
+ default:
#if PCRE2_CODE_UNIT_WIDTH == 8
- if (utf)
+ if (utf)
#endif
- match_all_or_no_wide_chars |= local_negate;
- break;
+ match_all_or_no_wide_chars |= local_negate;
+ break;
+ }
}
#endif /* SUPPORT_UNICODE */
@@ -6011,7 +6084,7 @@ for (;; pptr++)
case ESC_h:
(void)add_list_to_class(classbits, &class_uchardata,
- options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list),
+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list),
NOTACHAR);
break;
@@ -6022,7 +6095,7 @@ for (;; pptr++)
case ESC_v:
(void)add_list_to_class(classbits, &class_uchardata,
- options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list),
+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list),
NOTACHAR);
break;
@@ -6102,7 +6175,7 @@ for (;; pptr++)
if (C <= CHAR_i)
{
class_has_8bitchar +=
- add_to_class(classbits, &class_uchardata, options, xoptions,
+ add_to_class(classbits, &class_uchardata, options, xoptions,
cb, C + uc, ((D < CHAR_i)? D : CHAR_i) + uc);
C = CHAR_j;
}
@@ -6110,7 +6183,7 @@ for (;; pptr++)
if (C <= D && C <= CHAR_r)
{
class_has_8bitchar +=
- add_to_class(classbits, &class_uchardata, options, xoptions,
+ add_to_class(classbits, &class_uchardata, options, xoptions,
cb, C + uc, ((D < CHAR_r)? D : CHAR_r) + uc);
C = CHAR_s;
}
@@ -6118,7 +6191,7 @@ for (;; pptr++)
if (C <= D)
{
class_has_8bitchar +=
- add_to_class(classbits, &class_uchardata, options, xoptions,
+ add_to_class(classbits, &class_uchardata, options, xoptions,
cb, C + uc, D + uc);
}
}
@@ -6126,7 +6199,7 @@ for (;; pptr++)
#endif
/* Not an EBCDIC special range */
- class_has_8bitchar += add_to_class(classbits, &class_uchardata,
+ class_has_8bitchar += add_to_class(classbits, &class_uchardata,
options, xoptions, cb, c, d);
goto CONTINUE_CLASS; /* Go get the next char in the class */
} /* End of range handling */
@@ -6135,7 +6208,7 @@ for (;; pptr++)
/* Handle a single character. */
class_has_8bitchar +=
- add_to_class(classbits, &class_uchardata, options, xoptions, cb,
+ add_to_class(classbits, &class_uchardata, options, xoptions, cb,
meta, meta);
}
@@ -6621,7 +6694,7 @@ for (;; pptr++)
if ((group_return =
compile_regex(
options, /* The options state */
- xoptions, /* The extra options state */
+ xoptions, /* The extra options state */
&tempcode, /* Where to put code (updated) */
&pptr, /* Input pointer (updated) */
errorcodeptr, /* Where to put an error message */
@@ -8020,7 +8093,7 @@ for (;; pptr++)
{
uint32_t caseset = UCD_CASESET(meta);
if (caseset != 0 &&
- ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 ||
+ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 ||
PRIV(ucd_caseless_sets)[caseset] > 127))
{
*code++ = OP_PROP;
@@ -8137,7 +8210,7 @@ the two phases.
Arguments:
options option bits, including any changes for this subpattern
- xoptions extra option bits, ditto
+ xoptions extra option bits, ditto
codeptr -> the address of the current code pointer
pptrptr -> the address of the current parsed pattern pointer
errorcodeptr -> pointer to error code variable
@@ -8157,10 +8230,10 @@ Returns: 0 There has been an error
*/
static int
-compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr,
- uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits,
- uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr,
- uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb,
+compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr,
+ uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits,
+ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr,
+ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb,
PCRE2_SIZE *lengthptr)
{
PCRE2_UCHAR *code = *codeptr;
@@ -8257,7 +8330,7 @@ for (;;)
into the length. */
if ((branch_return =
- compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr,
+ compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr,
&branchfirstcu, &branchfirstcuflags, &branchreqcu, &branchreqcuflags,
&bc, cb, (lengthptr == NULL)? NULL : &length)) == 0)
return 0;
@@ -10292,7 +10365,7 @@ code = cworkspace;
*code = OP_BRA;
(void)compile_regex(cb.external_options, ccontext->extra_options, &code, &pptr,
- &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb,
+ &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb,
&length);
if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */
@@ -10390,8 +10463,8 @@ of the function here. */
pptr = cb.parsed_pattern;
code = (PCRE2_UCHAR *)codestart;
*code = OP_BRA;
-regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code,
- &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL,
+regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code,
+ &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL,
&cb, NULL);
if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY;
re->top_bracket = cb.bracount;
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 169c6181..6bae5bb5 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -628,6 +628,9 @@ typedef struct modstruct {
PCRE2_SIZE offset;
} modstruct;
+#define PCRE2_EXTRA_ASCII_ALL (PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS| \
+ PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX)
+
static modstruct modlist[] = {
{ "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
{ "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
@@ -642,6 +645,11 @@ static modstruct modlist[] = {
{ "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
{ "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
+ { "ascii_all", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_ALL, CO(extra_options) },
+ { "ascii_bsd", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSD, CO(extra_options) },
+ { "ascii_bss", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSS, CO(extra_options) },
+ { "ascii_bsw", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSW, CO(extra_options) },
+ { "ascii_posix", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_POSIX, CO(extra_options) },
{ "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
{ "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
{ "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
@@ -839,6 +847,7 @@ typedef struct c1modstruct {
static c1modstruct c1modlist[] = {
{ "bincode", 'B', -1 },
{ "info", 'I', -1 },
+ { "ascii_all", 'a', -1 },
{ "global", 'g', -1 },
{ "caseless", 'i', -1 },
{ "multiline", 'm', -1 },
@@ -4283,15 +4292,19 @@ show_compile_extra_options(uint32_t options, const char *before,
const char *after)
{
if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
-else fprintf(outfile, "%s%s%s%s%s%s%s%s%s",
+else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
+ ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " alt_bsux" : "",
+ ((options & PCRE2_EXTRA_ASCII_BSD) != 0)? " ascii_bsd" : "",
+ ((options & PCRE2_EXTRA_ASCII_BSS) != 0)? " ascii_bss" : "",
+ ((options & PCRE2_EXTRA_ASCII_BSW) != 0)? " ascii_bsw" : "",
+ ((options & PCRE2_EXTRA_ASCII_POSIX) != 0)? " ascii_posix" : "",
((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
- ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
+ ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "",
+ ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
- ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
- ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "",
after);
}
diff --git a/testdata/testinput5 b/testdata/testinput5
index b8174230..6e186cf0 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -2309,4 +2309,137 @@
# End caseless restrict tests
+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
+
+# DIGITS
+
+/\d+/i,utf
+ 123\x{660}456
+
+/\d+/i,utf,ucp
+ 123\x{660}456
+
+/\d+/i,utf,ucp,ascii_bsd
+ 123\x{660}456
+
+/[\d]+/i,utf
+ 123\x{660}456
+
+/[\d]+/i,utf,ucp
+ 123\x{660}456
+
+/[\d]+/i,utf,ucp,ascii_bsd
+ 123\x{660}456
+
+/\d(?aD)\d(?-aD)\d/utf,ucp
+ \x{660}9\x{660}
+\= Expect no match
+ \x{660}\x{660}\x{660}
+
+/\d(?a)\d(?-a)\d/utf,ucp
+ \x{660}9\x{660}
+\= Expect no match
+ \x{660}\x{660}\x{660}
+
+# SPACES
+
+/>\s+</i,utf
+ > <
+\= Expect no match
+ >\x{a0} <
+
+/>\s+</i,utf,ucp
+ > <
+ >\x{a0} <
+
+/>\s+</i,utf,ucp,ascii_bss
+ > <
+\= Expect no match
+ >\x{a0} <
+
+/>[\s]+</i,utf
+ > <
+\= Expect no match
+ >\x{a0} <
+
+/>[\s]+</i,utf,ucp
+ > <
+ >\x{a0} <
+
+/>[\s]+</i,utf,ucp,ascii_bss
+ > <
+\= Expect no match
+ >\x{a0} <
+
+/>\s(?aS)\s(?-aS)\s</utf,ucp
+ >\x{a0} \x{a0}<
+\= Expect no match
+ >\x{a0}\x{a0}\x{a0}<
+
+/>\s(?a)\s(?-a)\s</utf,ucp
+ >\x{a0} \x{a0}<
+\= Expect no match
+ >\x{a0}\x{a0}\x{a0}<
+
+# WORDS
+
+/\w+/i,utf
+ 123\x{660}abc
+
+/\w+/i,utf,ucp
+ 123\x{660}abc
+
+/\w+/i,utf,ucp,ascii_bsw
+ 123\x{660}abc
+
+/[\w]+/i,utf
+ 123\x{660}abc
+
+/[\w]+/i,utf,ucp
+ 123\x{660}abc
+
+/[\w]+/i,utf,ucp,ascii_bsw
+ 123\x{660}abc
+
+/\w(?aW)\w(?-aW)\w/utf,ucp
+ \x{660}A\x{c0}
+\= Expect no match
+ \x{660}\x{c0}\x{c0}
+
+/\w(?a)\w(?-a)\w/utf,ucp
+ \x{660}A\x{c0}
+\= Expect no match
+ \x{660}\x{c0}\x{c0}
+
+# POSIX
+
+/[[:digit:]]+/utf,ucp
+ 123\x{660}456
+
+/[[:digit:]]+/utf,ucp,ascii_posix
+ 123\x{660}456
+
+/>[[:space:]]+</utf,ucp
+ >\x{a0} \x{a0}<
+ >\x{a0}\x{a0}\x{a0}<
+
+/>[[:space:]]+</utf,ucp,ascii_posix
+\= Expect no match
+ >\x{a0} \x{a0}<
+
+/(?aP)[[:alnum:]]+/i,ucp,utf
+ abcáxyz
+ abc\x{660}xyz
+
+/(?aP)[[:alnum:]\d]+/i,ucp,utf
+ abc\x{660}xyz
+
+# VARIOUS
+
+/[\d\s\w]+/a,ucp,utf
+ 9 A\x{660}À
+ 9 AÀ\x{660}
+
+# End PCRE2_EXTRA_ASCII_xxx tests
+
# End of testinput5
diff --git a/testdata/testinput7 b/testdata/testinput7
index 991de885..64a37ad2 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -2328,4 +2328,137 @@
# End caseless restrict tests
+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
+
+# DIGITS
+
+/\d+/i,utf
+ 123\x{660}456
+
+/\d+/i,utf,ucp
+ 123\x{660}456
+
+/\d+/i,utf,ucp,ascii_bsd
+ 123\x{660}456
+
+/[\d]+/i,utf
+ 123\x{660}456
+
+/[\d]+/i,utf,ucp
+ 123\x{660}456
+
+/[\d]+/i,utf,ucp,ascii_bsd
+ 123\x{660}456
+
+/\d(?aD)\d(?-aD)\d/utf,ucp
+ \x{660}9\x{660}
+\= Expect no match
+ \x{660}\x{660}\x{660}
+
+/\d(?a)\d(?-a)\d/utf,ucp
+ \x{660}9\x{660}
+\= Expect no match
+ \x{660}\x{660}\x{660}
+
+# SPACES
+
+/>\s+</i,utf
+ > <
+\= Expect no match
+ >\x{a0} <
+
+/>\s+</i,utf,ucp
+ > <
+ >\x{a0} <
+
+/>\s+</i,utf,ucp,ascii_bss
+ > <
+\= Expect no match
+ >\x{a0} <
+
+/>[\s]+</i,utf
+ > <
+\= Expect no match
+ >\x{a0} <
+
+/>[\s]+</i,utf,ucp
+ > <
+ >\x{a0} <
+
+/>[\s]+</i,utf,ucp,ascii_bss
+ > <
+\= Expect no match
+ >\x{a0} <
+
+/>\s(?aS)\s(?-aS)\s</utf,ucp
+ >\x{a0} \x{a0}<
+\= Expect no match
+ >\x{a0}\x{a0}\x{a0}<
+
+/>\s(?a)\s(?-a)\s</utf,ucp
+ >\x{a0} \x{a0}<
+\= Expect no match
+ >\x{a0}\x{a0}\x{a0}<
+
+# WORDS
+
+/\w+/i,utf
+ 123\x{660}abc
+
+/\w+/i,utf,ucp
+ 123\x{660}abc
+
+/\w+/i,utf,ucp,ascii_bsw
+ 123\x{660}abc
+
+/[\w]+/i,utf
+ 123\x{660}abc
+
+/[\w]+/i,utf,ucp
+ 123\x{660}abc
+
+/[\w]+/i,utf,ucp,ascii_bsw
+ 123\x{660}abc
+
+/\w(?aW)\w(?-aW)\w/utf,ucp
+ \x{660}A\x{c0}
+\= Expect no match
+ \x{660}\x{c0}\x{c0}
+
+/\w(?a)\w(?-a)\w/utf,ucp
+ \x{660}A\x{c0}
+\= Expect no match
+ \x{660}\x{c0}\x{c0}
+
+# POSIX
+
+/[[:digit:]]+/utf,ucp
+ 123\x{660}456
+
+/[[:digit:]]+/utf,ucp,ascii_posix
+ 123\x{660}456
+
+/>[[:space:]]+</utf,ucp
+ >\x{a0} \x{a0}<
+ >\x{a0}\x{a0}\x{a0}<
+
+/>[[:space:]]+</utf,ucp,ascii_posix
+\= Expect no match
+ >\x{a0} \x{a0}<
+
+/(?aP)[[:alnum:]]+/i,ucp,utf
+ abcáxyz
+ abc\x{660}xyz
+
+/(?aP)[[:alnum:]\d]+/i,ucp,utf
+ abc\x{660}xyz
+
+# VARIOUS
+
+/[\d\s\w]+/a,ucp,utf
+ 9 A\x{660}À
+ 9 AÀ\x{660}
+
+# End PCRE2_EXTRA_ASCII_xxx tests
+
# End of testinput7
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index db42a117..26972f70 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -5196,4 +5196,183 @@ No match
# End caseless restrict tests
+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
+
+# DIGITS
+
+/\d+/i,utf
+ 123\x{660}456
+ 0: 123
+
+/\d+/i,utf,ucp
+ 123\x{660}456
+ 0: 123\x{660}456
+
+/\d+/i,utf,ucp,ascii_bsd
+ 123\x{660}456
+ 0: 123
+
+/[\d]+/i,utf
+ 123\x{660}456
+ 0: 123
+
+/[\d]+/i,utf,ucp
+ 123\x{660}456
+ 0: 123\x{660}456
+
+/[\d]+/i,utf,ucp,ascii_bsd
+ 123\x{660}456
+ 0: 123
+
+/\d(?aD)\d(?-aD)\d/utf,ucp
+ \x{660}9\x{660}
+ 0: \x{660}9\x{660}
+\= Expect no match
+ \x{660}\x{660}\x{660}
+No match
+
+/\d(?a)\d(?-a)\d/utf,ucp
+ \x{660}9\x{660}
+ 0: \x{660}9\x{660}
+\= Expect no match
+ \x{660}\x{660}\x{660}
+No match
+
+# SPACES
+
+/>\s+</i,utf
+ > <
+ 0: > <
+\= Expect no match
+ >\x{a0} <
+No match
+
+/>\s+</i,utf,ucp
+ > <
+ 0: > <
+ >\x{a0} <
+ 0: >\x{a0} <
+
+/>\s+</i,utf,ucp,ascii_bss
+ > <
+ 0: > <
+\= Expect no match
+ >\x{a0} <
+No match
+
+/>[\s]+</i,utf
+ > <
+ 0: > <
+\= Expect no match
+ >\x{a0} <
+No match
+
+/>[\s]+</i,utf,ucp
+ > <
+ 0: > <
+ >\x{a0} <
+ 0: >\x{a0} <
+
+/>[\s]+</i,utf,ucp,ascii_bss
+ > <
+ 0: > <
+\= Expect no match
+ >\x{a0} <
+No match
+
+/>\s(?aS)\s(?-aS)\s</utf,ucp
+ >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+\= Expect no match
+ >\x{a0}\x{a0}\x{a0}<
+No match
+
+/>\s(?a)\s(?-a)\s</utf,ucp
+ >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+\= Expect no match
+ >\x{a0}\x{a0}\x{a0}<
+No match
+
+# WORDS
+
+/\w+/i,utf
+ 123\x{660}abc
+ 0: 123
+
+/\w+/i,utf,ucp
+ 123\x{660}abc
+ 0: 123\x{660}abc
+
+/\w+/i,utf,ucp,ascii_bsw
+ 123\x{660}abc
+ 0: 123
+
+/[\w]+/i,utf
+ 123\x{660}abc
+ 0: 123
+
+/[\w]+/i,utf,ucp
+ 123\x{660}abc
+ 0: 123\x{660}abc
+
+/[\w]+/i,utf,ucp,ascii_bsw
+ 123\x{660}abc
+ 0: 123
+
+/\w(?aW)\w(?-aW)\w/utf,ucp
+ \x{660}A\x{c0}
+ 0: \x{660}A\x{c0}
+\= Expect no match
+ \x{660}\x{c0}\x{c0}
+No match
+
+/\w(?a)\w(?-a)\w/utf,ucp
+ \x{660}A\x{c0}
+ 0: \x{660}A\x{c0}
+\= Expect no match
+ \x{660}\x{c0}\x{c0}
+No match
+
+# POSIX
+
+/[[:digit:]]+/utf,ucp
+ 123\x{660}456
+ 0: 123\x{660}456
+
+/[[:digit:]]+/utf,ucp,ascii_posix
+ 123\x{660}456
+ 0: 123
+
+/>[[:space:]]+</utf,ucp
+ >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+ >\x{a0}\x{a0}\x{a0}<
+ 0: >\x{a0}\x{a0}\x{a0}<
+
+/>[[:space:]]+</utf,ucp,ascii_posix
+\= Expect no match
+ >\x{a0} \x{a0}<
+No match
+
+/(?aP)[[:alnum:]]+/i,ucp,utf
+ abcáxyz
+ 0: abc
+ abc\x{660}xyz
+ 0: abc
+
+/(?aP)[[:alnum:]\d]+/i,ucp,utf
+ abc\x{660}xyz
+ 0: abc\x{660}xyz
+
+# VARIOUS
+
+/[\d\s\w]+/a,ucp,utf
+ 9 A\x{660}À
+ 0: 9 A
+ 9 AÀ\x{660}
+ 0: 9 A
+
+# End PCRE2_EXTRA_ASCII_xxx tests
+
# End of testinput5
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index c2291a10..c830748c 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -3936,4 +3936,183 @@ No match
# End caseless restrict tests
+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
+
+# DIGITS
+
+/\d+/i,utf
+ 123\x{660}456
+ 0: 123
+
+/\d+/i,utf,ucp
+ 123\x{660}456
+ 0: 123\x{660}456
+
+/\d+/i,utf,ucp,ascii_bsd
+ 123\x{660}456
+ 0: 123
+
+/[\d]+/i,utf
+ 123\x{660}456
+ 0: 123
+
+/[\d]+/i,utf,ucp
+ 123\x{660}456
+ 0: 123\x{660}456
+
+/[\d]+/i,utf,ucp,ascii_bsd
+ 123\x{660}456
+ 0: 123
+
+/\d(?aD)\d(?-aD)\d/utf,ucp
+ \x{660}9\x{660}
+ 0: \x{660}9\x{660}
+\= Expect no match
+ \x{660}\x{660}\x{660}
+No match
+
+/\d(?a)\d(?-a)\d/utf,ucp
+ \x{660}9\x{660}
+ 0: \x{660}9\x{660}
+\= Expect no match
+ \x{660}\x{660}\x{660}
+No match
+
+# SPACES
+
+/>\s+</i,utf
+ > <
+ 0: > <
+\= Expect no match
+ >\x{a0} <
+No match
+
+/>\s+</i,utf,ucp
+ > <
+ 0: > <
+ >\x{a0} <
+ 0: >\x{a0} <
+
+/>\s+</i,utf,ucp,ascii_bss
+ > <
+ 0: > <
+\= Expect no match
+ >\x{a0} <
+No match
+
+/>[\s]+</i,utf
+ > <
+ 0: > <
+\= Expect no match
+ >\x{a0} <
+No match
+
+/>[\s]+</i,utf,ucp
+ > <
+ 0: > <
+ >\x{a0} <
+ 0: >\x{a0} <
+
+/>[\s]+</i,utf,ucp,ascii_bss
+ > <
+ 0: > <
+\= Expect no match
+ >\x{a0} <
+No match
+
+/>\s(?aS)\s(?-aS)\s</utf,ucp
+ >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+\= Expect no match
+ >\x{a0}\x{a0}\x{a0}<
+No match
+
+/>\s(?a)\s(?-a)\s</utf,ucp
+ >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+\= Expect no match
+ >\x{a0}\x{a0}\x{a0}<
+No match
+
+# WORDS
+
+/\w+/i,utf
+ 123\x{660}abc
+ 0: 123
+
+/\w+/i,utf,ucp
+ 123\x{660}abc
+ 0: 123\x{660}abc
+
+/\w+/i,utf,ucp,ascii_bsw
+ 123\x{660}abc
+ 0: 123
+
+/[\w]+/i,utf
+ 123\x{660}abc
+ 0: 123
+
+/[\w]+/i,utf,ucp
+ 123\x{660}abc
+ 0: 123\x{660}abc
+
+/[\w]+/i,utf,ucp,ascii_bsw
+ 123\x{660}abc
+ 0: 123
+
+/\w(?aW)\w(?-aW)\w/utf,ucp
+ \x{660}A\x{c0}
+ 0: \x{660}A\x{c0}
+\= Expect no match
+ \x{660}\x{c0}\x{c0}
+No match
+
+/\w(?a)\w(?-a)\w/utf,ucp
+ \x{660}A\x{c0}
+ 0: \x{660}A\x{c0}
+\= Expect no match
+ \x{660}\x{c0}\x{c0}
+No match
+
+# POSIX
+
+/[[:digit:]]+/utf,ucp
+ 123\x{660}456
+ 0: 123\x{660}456
+
+/[[:digit:]]+/utf,ucp,ascii_posix
+ 123\x{660}456
+ 0: 123
+
+/>[[:space:]]+</utf,ucp
+ >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+ >\x{a0}\x{a0}\x{a0}<
+ 0: >\x{a0}\x{a0}\x{a0}<
+
+/>[[:space:]]+</utf,ucp,ascii_posix
+\= Expect no match
+ >\x{a0} \x{a0}<
+No match
+
+/(?aP)[[:alnum:]]+/i,ucp,utf
+ abcáxyz
+ 0: abc
+ abc\x{660}xyz
+ 0: abc
+
+/(?aP)[[:alnum:]\d]+/i,ucp,utf
+ abc\x{660}xyz
+ 0: abc\x{660}xyz
+
+# VARIOUS
+
+/[\d\s\w]+/a,ucp,utf
+ 9 A\x{660}À
+ 0: 9 A
+ 9 AÀ\x{660}
+ 0: 9 A
+
+# End PCRE2_EXTRA_ASCII_xxx tests
+
# End of testinput7
--
2.23.0
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。