diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 25c2b108ea416ad9d4f9202223a4c79a2ec9f959..5acdd45f59fb76a2c0db7e9e63d0059d215cf2a7 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -137,6 +137,7 @@ set(ECMASCRIPT_SOURCES ${ECMA_SRC_DIR}/js_primitive_ref.cpp ${ECMA_SRC_DIR}/js_promise.cpp ${ECMA_SRC_DIR}/js_proxy.cpp + ${ECMA_SRC_DIR}/js_regexp_iterator.cpp ${ECMA_SRC_DIR}/js_serializer.cpp ${ECMA_SRC_DIR}/js_set.cpp ${ECMA_SRC_DIR}/js_set_iterator.cpp @@ -150,6 +151,7 @@ set(ECMASCRIPT_SOURCES ${ECMA_SRC_DIR}/linked_hash_table.cpp ${ECMA_SRC_DIR}/literal_data_extractor.cpp ${ECMA_SRC_DIR}/message_string.cpp + ${ECMA_SRC_DIR}/mem/dyn_chunk.cpp ${ECMA_SRC_DIR}/mem/ecma_reference_processor.cpp ${ECMA_SRC_DIR}/mem/ecma_string.cpp ${ECMA_SRC_DIR}/mem/mem_manager.cpp @@ -158,7 +160,6 @@ set(ECMASCRIPT_SOURCES ${ECMA_SRC_DIR}/object_factory.cpp ${ECMA_SRC_DIR}/object_operator.cpp ${ECMA_SRC_DIR}/layout_info.cpp - ${ECMA_SRC_DIR}/regexp/dyn_chunk.cpp ${ECMA_SRC_DIR}/regexp/regexp_executor.cpp ${ECMA_SRC_DIR}/regexp/regexp_opcode.cpp ${ECMA_SRC_DIR}/regexp/regexp_parser.cpp diff --git a/runtime/base/string_helper.h b/runtime/base/string_helper.h index 1b2d06385363207acf14b6cb3779c11a5e821a1d..b2c2920b934182dbb8fe71bea0ffd751ac80415d 100644 --- a/runtime/base/string_helper.h +++ b/runtime/base/string_helper.h @@ -32,12 +32,27 @@ #include "libpandafile/file_items.h" namespace panda::ecmascript::base { + +// NOLINTNEXTLINE(modernize-avoid-c-arrays) +static constexpr int UICODE_FROM_UTF8[] = { + 0x80, 0xc0, 0xdf, 0xe0, 0xef, 0xf0, 0xf7, 0xf8, 0xfb, 0xfc, 0xfd, +}; +// NOLINTNEXTLINE(modernize-avoid-c-arrays) +static constexpr int UTF8_MIN_CODE[] = { + 0x80, 0x800, 0x10000, 0x00200000, 0x04000000, +}; +// NOLINTNEXTLINE(modernize-avoid-c-arrays) +static constexpr char UTF8_FIRST_CODE[] = { + 0x1f, 0xf, 0x7, 0x3, 0x1, +}; + enum PadPlacement { START, END }; enum TrimKind { TRIM_START, TRIM_END, TRIM_START_END }; class StringHelper { public: + static constexpr int INVALID_UNICODE_FROM_UTF8 = -1; static std::string ToStdString(EcmaString *string); static bool CheckDuplicate(EcmaString *string); @@ -168,6 +183,57 @@ public: static EcmaString *Repeat(JSThread *thread, const std::u16string &thisStr, int32_t repeatLen, bool canBeCompress); + static int UnicodeFromUtf8(const uint8_t *p, int maxLen, const uint8_t **pp) + { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + int c = *p++; + if (c < UICODE_FROM_UTF8[0]) { + *pp = p; + return c; + } + int l = 0; + if (c >= UICODE_FROM_UTF8[1U] && c <= UICODE_FROM_UTF8[2U]) { // 1 - 2: 0000 0080 - 0000 07FF + l = 1; // 1: 0000 0080 - 0000 07FF Unicode + } else if (c >= UICODE_FROM_UTF8[3U] && c <= UICODE_FROM_UTF8[4U]) { // 3 - 4: 0000 0800 - 0000 FFFF + l = 2; // 2: 0000 0800 - 0000 FFFF Unicode + } else if (c >= UICODE_FROM_UTF8[5U] && c <= UICODE_FROM_UTF8[6U]) { // 5 - 6: 0001 0000 - 0010 FFFF + l = 3; // 3: 0001 0000 - 0010 FFFF Unicode + } else if (c >= UICODE_FROM_UTF8[7U] && c <= UICODE_FROM_UTF8[8U]) { // 7 - 8: 0020 0000 - 03FF FFFF + l = 4; // 4: 0020 0000 - 03FF FFFF Unicode + // NOLINTNEXTLINE(readability-magic-numbers) + } else if (c == UICODE_FROM_UTF8[9U] || c == UICODE_FROM_UTF8[10U]) { // 9 - 10: 0400 0000 - 7FFF FFFF + l = 5; // 5: 0400 0000 - 7FFF FFFF Unicode + } else { + return INVALID_UNICODE_FROM_UTF8; + } + /* check that we have enough characters */ + if (l > (maxLen - 1)) { + return INVALID_UNICODE_FROM_UTF8; + } + return FromUtf8(c, l, p, pp); + } + + static int FromUtf8(int c, int l, const uint8_t *p, const uint8_t **pp) + { + uint32_t b; + // NOLINTNEXTLINE(hicpp-signed-bitwise) + c &= UTF8_FIRST_CODE[l - 1]; + for (int i = 0; i < l; i++) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + b = *p++; + if (b < utf_helper::UTF8_2B_SECOND || b >= utf_helper::UTF8_2B_FIRST) { + return INVALID_UNICODE_FROM_UTF8; + } + // NOLINTNEXTLINE(hicpp-signed-bitwise) + c = (c << 6) | (b & utf_helper::UTF8_2B_THIRD); // 6: Maximum Unicode range + } + if (c < UTF8_MIN_CODE[l - 1]) { + return INVALID_UNICODE_FROM_UTF8; + } + *pp = p; + return c; + } + static EcmaString *Trim(JSThread *thread, const std::u16string &thisStr, TrimKind kind); static panda::ecmascript::JSTaggedValue StringPad(JSThread *thread, JSHandle obj, diff --git a/runtime/base/utf_helper.h b/runtime/base/utf_helper.h index 48728f726ed91219cb9afe86688b682312ad5563..71271254aae1006fea234939a28687cb146b9e1a 100644 --- a/runtime/base/utf_helper.h +++ b/runtime/base/utf_helper.h @@ -40,6 +40,7 @@ static constexpr uint8_t UTF8_1B_MAX = 0x7f; static constexpr uint16_t UTF8_2B_MAX = 0x7ff; static constexpr uint8_t UTF8_2B_FIRST = 0xc0; static constexpr uint8_t UTF8_2B_SECOND = 0x80; +static constexpr uint8_t UTF8_2B_THIRD = 0x3f; static constexpr uint8_t UTF8_3B_FIRST = 0xe0; static constexpr uint8_t UTF8_3B_SECOND = 0x80; diff --git a/runtime/builtins.cpp b/runtime/builtins.cpp index bda67a8556adfb38b90d10fc50848080a85883f1..fafd1278a5c1042ec9fb30525b097e194aa44f14 100644 --- a/runtime/builtins.cpp +++ b/runtime/builtins.cpp @@ -84,6 +84,7 @@ #include "plugins/ecmascript/runtime/js_primitive_ref.h" #include "plugins/ecmascript/runtime/js_promise.h" #include "plugins/ecmascript/runtime/js_regexp.h" +#include "plugins/ecmascript/runtime/js_regexp_iterator.h" #include "plugins/ecmascript/runtime/js_relative_time_format.h" #include "plugins/ecmascript/runtime/js_runtime_options.h" #include "plugins/ecmascript/runtime/js_set.h" @@ -573,6 +574,8 @@ void Builtins::InitializeSymbol(const JSHandle &env, const JSHandle matchSymbol(factory_->NewPublicSymbolWithChar("Symbol.match")); SetNoneAttributeProperty(symbolFunction, "match", matchSymbol); + JSHandle matchAllSymbol(factory_->NewPublicSymbolWithChar("Symbol.matchAll")); + SetNoneAttributeProperty(symbolFunction, "matchAll", matchAllSymbol); JSHandle replaceSymbol(factory_->NewPublicSymbolWithChar("Symbol.replace")); SetNoneAttributeProperty(symbolFunction, "replace", replaceSymbol); JSHandle searchSymbol(factory_->NewPublicSymbolWithChar("Symbol.search")); @@ -608,6 +611,7 @@ void Builtins::InitializeSymbol(const JSHandle &env, const JSHandleSetIteratorSymbol(thread_, iteratorSymbol); env->SetAsyncIteratorSymbol(thread_, asyncIteratorSymbol); env->SetMatchSymbol(thread_, matchSymbol); + env->SetMatchAllSymbol(thread_, matchAllSymbol); env->SetReplaceSymbol(thread_, replaceSymbol); env->SetSearchSymbol(thread_, searchSymbol); env->SetSpeciesSymbol(thread_, speciesSymbol); @@ -663,6 +667,7 @@ void Builtins::InitializeSymbolWithRealm(const JSHandle &realm, SetNoneAttributeProperty(symbolFunction, "iterator", env->GetIteratorSymbol()); SetNoneAttributeProperty(symbolFunction, "asyncIterator", env->GetAsyncIteratorSymbol()); SetNoneAttributeProperty(symbolFunction, "match", env->GetMatchSymbol()); + SetNoneAttributeProperty(symbolFunction, "matchAll", env->GetMatchAllSymbol()); SetNoneAttributeProperty(symbolFunction, "replace", env->GetReplaceSymbol()); SetNoneAttributeProperty(symbolFunction, "search", env->GetSearchSymbol()); SetNoneAttributeProperty(symbolFunction, "species", env->GetSpeciesSymbol()); @@ -693,6 +698,7 @@ void Builtins::InitializeSymbolWithRealm(const JSHandle &realm, realm->SetIteratorSymbol(thread_, env->GetIteratorSymbol()); realm->SetAsyncIteratorSymbol(thread_, env->GetAsyncIteratorSymbol()); realm->SetMatchSymbol(thread_, env->GetMatchSymbol()); + realm->SetMatchAllSymbol(thread_, env->GetMatchAllSymbol()); realm->SetReplaceSymbol(thread_, env->GetReplaceSymbol()); realm->SetSearchSymbol(thread_, env->GetSearchSymbol()); realm->SetSpeciesSymbol(thread_, env->GetSpeciesSymbol()); @@ -1465,6 +1471,7 @@ void Builtins::InitializeString(const JSHandle &env, const JSHandle &env, const JSHandle InitializeMapIterator(env, iteratorFuncDynclass); InitializeArrayIterator(env, iteratorFuncDynclass); InitializeStringIterator(env, iteratorFuncDynclass); + InitializeRegexpIterator(env, iteratorFuncDynclass); } void Builtins::InitializeAsyncIterator(const JSHandle &env, const JSHandle &objFuncDynclass) const @@ -1616,6 +1624,7 @@ void Builtins::InitializeMapIterator(const JSHandle &env, SetStringTagSymbol(env, mapIteratorPrototype, "Map Iterator"); env->SetMapIteratorPrototype(thread_, mapIteratorPrototype); } + void Builtins::InitializeArrayIterator(const JSHandle &env, const JSHandle &iteratorFuncDynclass) const { @@ -1627,6 +1636,17 @@ void Builtins::InitializeArrayIterator(const JSHandle &env, env->SetArrayIteratorPrototype(thread_, arrayIteratorPrototype); } +void Builtins::InitializeRegexpIterator(const JSHandle &env, + const JSHandle &iteratorFuncClass) const +{ + // RegExpIterator.prototype + JSHandle regExpIteratorPrototype(factory_->NewJSObject(iteratorFuncClass)); + // Iterator.prototype.next() + SetFunction(env, regExpIteratorPrototype, "next", JSRegExpIterator::Next, FunctionLength::ZERO); + SetStringTagSymbol(env, regExpIteratorPrototype, "RegExp String Iterator"); + env->SetRegExpIteratorPrototype(thread_, regExpIteratorPrototype); +} + void Builtins::InitializeRegExp(const JSHandle &env) { [[maybe_unused]] EcmaHandleScope scope(thread_); @@ -1697,6 +1717,9 @@ void Builtins::InitializeRegExp(const JSHandle &env) FunctionLength::ONE); // Set RegExp.prototype[@@match] SetFunctionAtSymbol(env, regPrototype, env->GetMatchSymbol(), "[Symbol.match]", RegExp::Match, FunctionLength::ONE); + // Set RegExp.prototype[@@matchAll] + SetFunctionAtSymbol(env, regPrototype, env->GetMatchAllSymbol(), "[Symbol.matchAll]", RegExp::MatchAll, + FunctionLength::ONE); // Set RegExp.prototype[@@replace] SetFunctionAtSymbol(env, regPrototype, env->GetReplaceSymbol(), "[Symbol.replace]", RegExp::Replace, FunctionLength::TWO); diff --git a/runtime/builtins.h b/runtime/builtins.h index 399ada65af8efdfcb9da7abfc37dbb37948be109..c9cd2f15fc52c7c4dc3ffc559630921a94e07953 100644 --- a/runtime/builtins.h +++ b/runtime/builtins.h @@ -154,6 +154,8 @@ private: void InitializeArrayIterator(const JSHandle &env, const JSHandle &iteratorFuncDynclass) const; + void InitializeRegexpIterator(const JSHandle &env, const JSHandle &iteratorFuncClass) const; + void InitializeArrayBuffer(const JSHandle &env, const JSHandle &objFuncDynclass) const; void InitializeDataView(const JSHandle &env, const JSHandle &objFuncDynclass) const; diff --git a/runtime/builtins/builtins_regexp.cpp b/runtime/builtins/builtins_regexp.cpp index 3bbf9b9ac55b39a5991fa2e3c7741a31a53cdc21..26f925b62464788b81afa3aa08830f2111cf435f 100644 --- a/runtime/builtins/builtins_regexp.cpp +++ b/runtime/builtins/builtins_regexp.cpp @@ -15,17 +15,18 @@ #include "plugins/ecmascript/runtime/builtins/builtins_regexp.h" #include +#include "assert_gc_scope.h" #include "plugins/ecmascript/runtime/ecma_string-inl.h" #include "plugins/ecmascript/runtime/ecma_vm.h" #include "plugins/ecmascript/runtime/global_env.h" -#include "plugins/ecmascript/runtime/internal_call_params.h" #include "plugins/ecmascript/runtime/interpreter/fast_runtime_stub-inl.h" +#include "plugins/ecmascript/runtime/interpreter/interpreter.h" #include "plugins/ecmascript/runtime/js_array.h" #include "plugins/ecmascript/runtime/js_function.h" #include "plugins/ecmascript/runtime/js_hclass.h" -#include "plugins/ecmascript/runtime/js_invoker.h" #include "plugins/ecmascript/runtime/js_object-inl.h" #include "plugins/ecmascript/runtime/js_regexp.h" +#include "plugins/ecmascript/runtime/js_regexp_iterator.h" #include "plugins/ecmascript/runtime/js_tagged_value-inl.h" #include "plugins/ecmascript/runtime/object_factory.h" #include "plugins/ecmascript/runtime/regexp/regexp_parser_cache.h" @@ -42,26 +43,27 @@ JSTaggedValue BuiltinsRegExp::RegExpConstructor(EcmaRuntimeCallInfo *argv) JSHandle newTargetTemp = GetNewTarget(argv); JSHandle pattern = GetCallArg(argv, 0); JSHandle flags = GetCallArg(argv, 1); - ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); // 1. Let patternIsRegExp be IsRegExp(pattern). bool patternIsRegExp = JSObject::IsRegExp(thread, pattern); // 2. ReturnIfAbrupt(patternIsRegExp). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // 3. If NewTarget is not undefined, let newTarget be NewTarget. JSHandle newTarget; + const GlobalEnvConstants *globalConst = thread->GlobalConstants(); if (!newTargetTemp->IsUndefined()) { newTarget = newTargetTemp; } else { auto ecmaVm = thread->GetEcmaVM(); JSHandle env = ecmaVm->GetGlobalEnv(); - const GlobalEnvConstants *globalConst = thread->GlobalConstants(); + // disable gc + [[maybe_unused]] DisallowGarbageCollection no_gc; // 4.a Let newTarget be the active function object. newTarget = env->GetRegExpFunction(); JSHandle constructorString = globalConst->GetHandledConstructorString(); // 4.b If patternIsRegExp is true and flags is undefined if (patternIsRegExp && flags->IsUndefined()) { // 4.b.i Let patternConstructor be Get(pattern, "constructor"). - JSTaggedValue patternConstructor = FastRuntimeStub::FastGetPropertyByName( + JSTaggedValue patternConstructor = FastRuntimeStub::FastGetPropertyByValue( thread, pattern.GetTaggedValue(), constructorString.GetTaggedValue()); // 4.b.ii ReturnIfAbrupt(patternConstructor). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); @@ -92,8 +94,10 @@ JSTaggedValue BuiltinsRegExp::RegExpConstructor(EcmaRuntimeCallInfo *argv) } // 6. Else if patternIsRegExp is true } else if (patternIsRegExp) { - JSHandle sourceString(factory->NewFromCanBeCompressString("source")); - JSHandle flagsString(factory->NewFromCanBeCompressString("flags")); + JSHandle sourceString(globalConst->GetHandledSourceString()); + JSHandle flagsString(globalConst->GetHandledFlagsString()); + // disable gc + [[maybe_unused]] DisallowGarbageCollection noGc; // 6.a Let P be Get(pattern, "source"). patternTemp = JSObject::GetProperty(thread, pattern, sourceString).GetValue(); // 6.b ReturnIfAbrupt(P). @@ -127,6 +131,26 @@ JSTaggedValue BuiltinsRegExp::RegExpConstructor(EcmaRuntimeCallInfo *argv) return JSTaggedValue(result); } +static bool ExecCachingAllowed(JSThread *thread, JSHandle &thisVal) +{ + const GlobalEnvConstants *globalConst = thread->GlobalConstants(); + + if (!thisVal->IsJSObject()) { + return false; + } + JSObject *thisObj = JSObject::Cast(thisVal->GetTaggedObject()); + JSHClass *thisClass = thisObj->GetJSHClass(); + + JSHandle regexFn(thread->GetEcmaVM()->GetGlobalEnv()->GetRegExpFunction()); + if (JSTaggedValue(thisClass) != regexFn->GetProtoOrDynClass()) { + return false; + } + + JSTaggedValue regexProto = + JSHClass::Cast(globalConst->GetHandledJSRegExpClass()->GetTaggedObject())->GetPrototype(); + return thisClass->GetPrototype() == regexProto; +} + // prototype // 20.2.5.2 JSTaggedValue BuiltinsRegExp::Exec(EcmaRuntimeCallInfo *argv) @@ -154,13 +178,14 @@ JSTaggedValue BuiltinsRegExp::Exec(EcmaRuntimeCallInfo *argv) THROW_TYPE_ERROR_AND_RETURN(thread, "this does not have [[RegExpMatcher]]", JSTaggedValue::Exception()); } - bool isCached = true; + bool useCache = ExecCachingAllowed(thread, thisObj); JSHandle cacheTable(thread->GetEcmaVM()->GetRegExpCache()); - if (cacheTable->GetLargeStrCount() == 0U || cacheTable->GetConflictCount() != 0U) { - isCached = false; + if (cacheTable->GetLargeStrCount() == 0 || cacheTable->GetConflictCount() == 0) { + useCache = false; } + // 6. Return RegExpBuiltinExec(R, S). - JSTaggedValue result = RegExpBuiltinExec(thread, thisObj, string, isCached); + JSTaggedValue result = RegExpBuiltinExec(thread, thisObj, string, useCache); return JSTaggedValue(result); } @@ -208,8 +233,9 @@ JSTaggedValue BuiltinsRegExp::ToString(EcmaRuntimeCallInfo *argv) THROW_TYPE_ERROR_AND_RETURN(thread, "this is not Object", JSTaggedValue::Exception()); } ObjectFactory *factory = ecmaVm->GetFactory(); - JSHandle sourceString(factory->NewFromCanBeCompressString("source")); - JSHandle flagsString(factory->NewFromCanBeCompressString("flags")); + const GlobalEnvConstants *globalConstants = thread->GlobalConstants(); + JSHandle sourceString(globalConstants->GetHandledSourceString()); + JSHandle flagsString(globalConstants->GetHandledFlagsString()); // 3. Let pattern be ToString(Get(R, "source")). JSHandle getSource(JSObject::GetProperty(thread, thisObj, sourceString).GetValue()); JSHandle getFlags(JSObject::GetProperty(thread, thisObj, flagsString).GetValue()); @@ -220,44 +246,13 @@ JSTaggedValue BuiltinsRegExp::ToString(EcmaRuntimeCallInfo *argv) JSHandle flagsStrHandle = JSTaggedValue::ToString(thread, getFlags); // 4. ReturnIfAbrupt(flags). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - JSHandle slashStr = factory->NewFromCanBeCompressString("/"); + JSHandle slashStr = JSHandle::Cast(globalConstants->GetHandledBackslashString()); // 7. Let result be the String value formed by concatenating "/", pattern, and "/", and flags. JSHandle tempStr = factory->ConcatFromString(slashStr, sourceStrHandle); JSHandle resultTemp = factory->ConcatFromString(tempStr, slashStr); return factory->ConcatFromString(resultTemp, flagsStrHandle).GetTaggedValue(); } -JSHandle ConcatFlags(JSThread *thread, const JSHandle &obj, - const JSHandle &string, const char *name) -{ - auto ecmaVm = thread->GetEcmaVM(); - ObjectFactory *factory = ecmaVm->GetFactory(); - - JSHandle nameString(factory->NewFromString(name)); - bool exist = JSObject::GetProperty(thread, obj, nameString).GetValue()->ToBoolean(); - // ReturnIfAbrupt - RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSTaggedValue, thread); - if (exist) { - JSHandle temp = factory->GetEmptyString(); - if (PandaString("global") == name) { - temp = factory->NewFromString("g"); - } else if (PandaString("ignoreCase") == name) { - temp = factory->NewFromString("i"); - } else if (PandaString("multiline") == name) { - temp = factory->NewFromString("m"); - } else if (PandaString("dotAll") == name) { - temp = factory->NewFromString("s"); - } else if (PandaString("unicode") == name) { - temp = factory->NewFromString("u"); - } else if (PandaString("sticky") == name) { - temp = factory->NewFromString("y"); - } - JSHandle thisString(string); - return JSHandle(factory->ConcatFromString(thisString, temp)); - } - return JSHandle(string); -} - // 20.2.5.3 JSTaggedValue BuiltinsRegExp::GetFlags(EcmaRuntimeCallInfo *argv) { @@ -273,6 +268,7 @@ JSTaggedValue BuiltinsRegExp::GetFlags(EcmaRuntimeCallInfo *argv) } // 3. Let result be the empty String. // 4. ~ 19. + if (JSHandle::Cast(thisObj)->IsJSRegExp()) { uint8_t flagsBits = static_cast(JSRegExp::Cast(thisObj->GetTaggedObject())->GetOriginalFlags().GetInt()); @@ -280,14 +276,31 @@ JSTaggedValue BuiltinsRegExp::GetFlags(EcmaRuntimeCallInfo *argv) } ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); - JSHandle result(factory->GetEmptyString()); - result = ConcatFlags(thread, thisObj, result, "global"); - result = ConcatFlags(thread, thisObj, result, "ignoreCase"); - result = ConcatFlags(thread, thisObj, result, "multiline"); - result = ConcatFlags(thread, thisObj, result, "dotaAll"); - result = ConcatFlags(thread, thisObj, result, "unicode"); - result = ConcatFlags(thread, thisObj, result, "sticky"); - return JSTaggedValue(static_cast(result->GetHeapObject())); + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + std::array flagsStr; // six flags + \0 + { + char *flagsCur = flagsStr.data(); + + auto addFlagVal = [&](char const *propStr, char const flagChar) { + JSHandle prop(factory->NewFromString(propStr)); + JSHandle getResult(JSObject::GetProperty(thread, thisObj, prop).GetValue()); + if (getResult->ToBoolean()) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + *flagsCur++ = flagChar; + } + }; + addFlagVal("global", 'g'); + addFlagVal("ignoreCase", 'i'); + addFlagVal("multiline", 'm'); + addFlagVal("dotAll", 's'); + addFlagVal("unicode", 'u'); + addFlagVal("sticky", 'y'); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + *flagsCur = '\0'; + } + + return factory->NewFromString(flagsStr.data()).GetTaggedValue(); } // 20.2.5.4 @@ -403,10 +416,10 @@ JSTaggedValue BuiltinsRegExp::Match(EcmaRuntimeCallInfo *argv) // 3. Let S be ToString(string) JSHandle inputString = GetCallArg(argv, 0); JSHandle stringHandle = JSTaggedValue::ToString(thread, inputString); - bool isCached = true; + bool useCache = true; JSHandle cacheTable(thread->GetEcmaVM()->GetRegExpCache()); - if (cacheTable->GetLargeStrCount() == 0U || (cacheTable->GetConflictCount()) != 0U) { - isCached = false; + if (cacheTable->GetLargeStrCount() == 0 || cacheTable->GetConflictCount() == 0) { + useCache = false; } // 4. ReturnIfAbrupt(string). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); @@ -415,20 +428,17 @@ JSTaggedValue BuiltinsRegExp::Match(EcmaRuntimeCallInfo *argv) // 2. If Type(rx) is not Object, throw a TypeError exception. THROW_TYPE_ERROR_AND_RETURN(thread, "this is not Object", JSTaggedValue::Exception()); } - // 5. Let global be ToBoolean(Get(rx, "global")). const GlobalEnvConstants *globalConst = thread->GlobalConstants(); + + useCache &= ExecCachingAllowed(thread, thisObj); + + // 5. Let global be ToBoolean(Get(rx, "global")). JSHandle global = globalConst->GetHandledGlobalString(); JSTaggedValue globalValue = - FastRuntimeStub::FastGetPropertyByName(thread, thisObj.GetTaggedValue(), global.GetTaggedValue()); + FastRuntimeStub::FastGetPropertyByValue(thread, thisObj.GetTaggedValue(), global.GetTaggedValue()); // 6. ReturnIfAbrupt(global). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - bool isGlobal = globalValue.ToBoolean(); - // 7. If global is false, then - if (!isGlobal) { - // a. Return RegExpExec(rx, S). - JSTaggedValue result = RegExpExec(thread, thisObj, string, isCached); - return JSTaggedValue(result); - } + JSHandle regexpObj(thisObj); JSMutableHandle pattern(thread, JSTaggedValue::Undefined()); JSMutableHandle flags(thread, JSTaggedValue::Undefined()); @@ -436,28 +446,42 @@ JSTaggedValue BuiltinsRegExp::Match(EcmaRuntimeCallInfo *argv) pattern.Update(regexpObj->GetOriginalSource()); flags.Update(regexpObj->GetOriginalFlags()); } - if (isCached) { + bool isGlobal = globalValue.ToBoolean(); + // 7. If global is false, then + if (!isGlobal) { + // a. Return RegExpExec(rx, S). + if (useCache) { + JSTaggedValue cacheResult = cacheTable->FindCachedResult(thread, pattern, flags, inputString, + RegExpExecResultCache::EXEC_TYPE, thisObj); + if (cacheResult != JSTaggedValue::Undefined()) { + return cacheResult; + } + } + JSTaggedValue result = RegExpExec(thread, thisObj, string, useCache); + return JSTaggedValue(result); + } + + if (useCache) { JSTaggedValue cacheResult = cacheTable->FindCachedResult(thread, pattern, flags, inputString, RegExpExecResultCache::MATCH_TYPE, thisObj); if (cacheResult != JSTaggedValue::Undefined()) { return cacheResult; } } + // 8. Else global is true // a. Let fullUnicode be ToBoolean(Get(rx, "unicode")). JSHandle unicode = globalConst->GetHandledUnicodeString(); JSTaggedValue uincodeValue = - FastRuntimeStub::FastGetProperty(thread, thisObj.GetTaggedValue(), unicode.GetTaggedValue()); + FastRuntimeStub::FastGetPropertyByValue(thread, thisObj.GetTaggedValue(), unicode.GetTaggedValue()); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); bool fullUnicode = uincodeValue.ToBoolean(); // b. ReturnIfAbrupt(fullUnicode) RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // c. Let setStatus be Set(rx, "lastIndex", 0, true). JSHandle lastIndexString(globalConst->GetHandledLastIndexString()); - JSHandle value(thread, JSTaggedValue(0)); - FastRuntimeStub::FastSetProperty(thread, thisObj.GetTaggedValue(), lastIndexString.GetTaggedValue(), - JSTaggedValue(0), true); - JSObject::SetProperty(thread, thisObj, lastIndexString, value, true); + FastRuntimeStub::FastSetPropertyByValue(thread, thisObj.GetTaggedValue(), lastIndexString.GetTaggedValue(), + JSTaggedValue(0)); // d. ReturnIfAbrupt(setStatus). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // e. Let A be ArrayCreate(0). @@ -468,7 +492,8 @@ JSTaggedValue BuiltinsRegExp::Match(EcmaRuntimeCallInfo *argv) // g. Repeat, while (true) { // i. Let result be RegExpExec(rx, S). - result.Update(RegExpExec(thread, thisObj, string, isCached)); + result.Update(RegExpExec(thread, thisObj, string, useCache)); + // ii. ReturnIfAbrupt(result). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // iii. If result is null, then @@ -477,7 +502,7 @@ JSTaggedValue BuiltinsRegExp::Match(EcmaRuntimeCallInfo *argv) if (resultNum == 0) { return JSTaggedValue::Null(); } - if (isCached) { + if (useCache) { RegExpExecResultCache::AddResultInCache(thread, cacheTable, pattern, flags, inputString, JSHandle(array), RegExpExecResultCache::MATCH_TYPE, 0); @@ -488,8 +513,9 @@ JSTaggedValue BuiltinsRegExp::Match(EcmaRuntimeCallInfo *argv) // iv. Else result is not null, // 1. Let matchStr be ToString(Get(result, "0")). JSHandle zeroString = globalConst->GetHandledZeroString(); - JSHandle matchStr( - thread, FastRuntimeStub::FastGetProperty(thread, result.GetTaggedValue(), zeroString.GetTaggedValue())); + JSTaggedValue matchVal = + FastRuntimeStub::FastGetPropertyByValue(thread, result.GetTaggedValue(), zeroString.GetTaggedValue()); + JSHandle matchStr(thread, matchVal); JSHandle matchString = JSTaggedValue::ToString(thread, matchStr); // 2. ReturnIfAbrupt(matchStr). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); @@ -499,28 +525,104 @@ JSTaggedValue BuiltinsRegExp::Match(EcmaRuntimeCallInfo *argv) // 5. If matchStr is the empty String, then if (JSTaggedValue::ToString(thread, matchValue)->GetLength() == 0) { // a. Let thisIndex be ToLength(Get(rx, "lastIndex")). - JSHandle lastIndexHandle( - thread, - FastRuntimeStub::FastGetProperty(thread, thisObj.GetTaggedValue(), lastIndexString.GetTaggedValue())); + JSTaggedValue lastIndex = FastRuntimeStub::FastGetPropertyByValue(thread, thisObj.GetTaggedValue(), + lastIndexString.GetTaggedValue()); + JSHandle lastIndexHandle(thread, lastIndex); JSTaggedNumber thisIndex = JSTaggedValue::ToLength(thread, lastIndexHandle); // b. ReturnIfAbrupt(thisIndex). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // c. Let nextIndex be AdvanceStringIndex(S, thisIndex, fullUnicode). // d. Let setStatus be Set(rx, "lastIndex", nextIndex, true). - auto nextIndex = JSTaggedValue(AdvanceStringIndex(thread, string, thisIndex.GetNumber(), fullUnicode)); - FastRuntimeStub::FastSetProperty(thread, thisObj.GetTaggedValue(), lastIndexString.GetTaggedValue(), - nextIndex, true); + auto nextIndex = JSTaggedValue(AdvanceStringIndex(string, thisIndex.GetNumber(), fullUnicode)); + FastRuntimeStub::FastSetPropertyByValue(thread, thisObj.GetTaggedValue(), lastIndexString.GetTaggedValue(), + nextIndex); // e. ReturnIfAbrupt(setStatus). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); } - // 6. Increment n. + // 6. Increase n. resultNum++; } } +JSTaggedValue BuiltinsRegExp::MatchAll(EcmaRuntimeCallInfo *argv) +{ + ASSERT(argv); + JSThread *thread = argv->GetThread(); + BUILTINS_API_TRACE(thread, RegExp, MatchAll); + [[maybe_unused]] EcmaHandleScope handleScope(thread); + + // 1. Let R be the this value. + // 2. If Type(R) is not Object, throw a TypeError exception. + JSHandle thisObj = GetThis(argv); + auto ecmaVm = thread->GetEcmaVM(); + if (!thisObj->IsECMAObject()) { + THROW_TYPE_ERROR_AND_RETURN(thread, "this is not Object", JSTaggedValue::Exception()); + } + + // 3. Let S be ? ToString(string). + JSHandle inputString = GetCallArg(argv, 0); + JSHandle stringHandle = JSTaggedValue::ToString(thread, inputString); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + + // 4. Let C be ? SpeciesConstructor(R, %RegExp%). + JSHandle defaultConstructor = ecmaVm->GetGlobalEnv()->GetRegExpFunction(); + JSHandle objHandle(thisObj); + JSHandle constructor = JSObject::SpeciesConstructor(thread, objHandle, defaultConstructor); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + + const GlobalEnvConstants *globalConstants = thread->GlobalConstants(); + // 5. Let flags be ? ToString(? Get(R, "flags")). + JSHandle flagsString(globalConstants->GetHandledFlagsString()); + JSHandle getFlags(JSObject::GetProperty(thread, thisObj, flagsString).GetValue()); + JSHandle flagsStrHandle = JSTaggedValue::ToString(thread, getFlags); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + + // 6. Let matcher be ? Construct(C, « R, flags »). + JSHandle undefined = globalConstants->GetHandledUndefined(); + + InternalCallParams *arguments = thread->GetInternalCallParams(); + arguments->MakeArgv(thisObj.GetTaggedValue(), flagsStrHandle.GetTaggedValue()); + + JSTaggedValue taggedMatcher = JSFunction::Construct(thread, constructor, 2, arguments->GetArgv(), undefined); + JSHandle matcherHandle(thread, taggedMatcher); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + + // 7. Let lastIndex be ? ToLength(? Get(R, "lastIndex")). + JSHandle lastIndexString(globalConstants->GetHandledLastIndexString()); + JSHandle getLastIndex(JSObject::GetProperty(thread, thisObj, lastIndexString).GetValue()); + JSTaggedNumber thisLastIndex = JSTaggedValue::ToLength(thread, getLastIndex); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + + // 8. Perform ? Set(matcher, "lastIndex", lastIndex, true). + FastRuntimeStub::FastSetPropertyByValue(thread, matcherHandle.GetTaggedValue(), lastIndexString.GetTaggedValue(), + thisLastIndex); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + + // 9. If flags contains "g", let global be true. + // 10. Else, let global be false. + JSHandle gString(globalConstants->GetHandledGString()); + bool global = false; + if (base::StringHelper::Contains(*flagsStrHandle, *gString)) { + global = true; + } + + // 11. If flags contains "u", let fullUnicode be true. + // 12. Else, let fullUnicode be false. + JSHandle uString(globalConstants->GetHandledUString()); + bool fullUnicode = false; + if (base::StringHelper::Contains(*flagsStrHandle, *uString)) { + fullUnicode = true; + } + + // 13. Return ! CreateRegExpStringIterator(matcher, S, global, fullUnicode). + return JSRegExpIterator::CreateRegExpStringIterator(thread, matcherHandle, stringHandle, global, fullUnicode) + .GetTaggedValue(); +} + JSTaggedValue BuiltinsRegExp::RegExpReplaceFast(JSThread *thread, JSHandle ®exp, JSHandle inputString, uint32_t inputLength) { + ASSERT(regexp->IsJSRegExp()); ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); // get bytecode JSTaggedValue bufferData = JSRegExp::Cast(regexp->GetTaggedObject())->GetByteCodeBuffer(); @@ -532,14 +634,14 @@ JSTaggedValue BuiltinsRegExp::RegExpReplaceFast(JSThread *thread, JSHandle lastIndexHandle(thread->GlobalConstants()->GetHandledLastIndexString()); uint32_t lastIndex; JSHandle regexpHandle(regexp); - bool isCached = false; + bool useCache = false; if ((flags & (RegExpParser::FLAG_STICKY | RegExpParser::FLAG_GLOBAL)) == 0) { lastIndex = 0; } else { JSTaggedValue thisIndex = - FastRuntimeStub::FastGetProperty(thread, regexp.GetTaggedValue(), lastIndexHandle.GetTaggedValue()); + FastRuntimeStub::FastGetPropertyByValue(thread, regexp.GetTaggedValue(), lastIndexHandle.GetTaggedValue()); if (thisIndex.IsInt()) { - lastIndex = thisIndex.GetInt(); + lastIndex = static_cast(thisIndex.GetInt()); } else { JSHandle thisIndexHandle(thread, thisIndex); lastIndex = JSTaggedValue::ToLength(thread, thisIndexHandle).GetNumber(); @@ -547,13 +649,11 @@ JSTaggedValue BuiltinsRegExp::RegExpReplaceFast(JSThread *thread, JSHandleGlobalConstants(); JSHandle tagInputString = JSHandle::Cast(inputString); - JSMutableHandle pattern(thread, JSTaggedValue::Undefined()); - JSMutableHandle flagsBits(thread, JSTaggedValue::Undefined()); - if (regexp->IsJSRegExp()) { - pattern.Update(regexpHandle->GetOriginalSource()); - flagsBits.Update(regexpHandle->GetOriginalFlags()); - } + JSHandle pattern(thread, regexpHandle->GetOriginalSource()); + JSHandle flagsBits(thread, regexpHandle->GetOriginalFlags()); + JSHandle cacheTable(thread->GetEcmaVM()->GetRegExpCache()); uint32_t length = inputString->GetLength(); uint32_t largeStrCount = cacheTable->GetLargeStrCount(); @@ -564,12 +664,13 @@ JSTaggedValue BuiltinsRegExp::RegExpReplaceFast(JSThread *thread, JSHandleSetStrLenThreshold(thread, MIN_REPLACE_STRING_LENGTH); } - if (lastIndex == 0 && length > cacheTable->GetStrLenThreshold()) { - isCached = true; + if (length > cacheTable->GetStrLenThreshold()) { + useCache = ExecCachingAllowed(thread, regexp); } - if (isCached) { - JSTaggedValue cacheResult = cacheTable->FindCachedResult(thread, pattern, flagsBits, tagInputString, - RegExpExecResultCache::REPLACE_TYPE, regexp); + if (useCache) { + JSTaggedValue cacheResult = + cacheTable->FindCachedResult(thread, pattern, flagsBits, tagInputString, + RegExpExecResultCache::REPLACE_TYPE, regexp, globalConst->GetEmptyString()); if (cacheResult != JSTaggedValue::Undefined()) { return cacheResult; } @@ -577,7 +678,6 @@ JSTaggedValue BuiltinsRegExp::RegExpReplaceFast(JSThread *thread, JSHandle lastIndexValue(thread, JSTaggedValue(lastIndex)); // 12. Let done be false. // 13. Repeat, while done is false @@ -602,10 +702,10 @@ JSTaggedValue BuiltinsRegExp::RegExpReplaceFast(JSThread *thread, JSHandleIsUtf16() && ((flags & RegExpParser::FLAG_UTF16) != 0U); - endIndex = AdvanceStringIndex(thread, tagInputString, endIndex, unicode); + bool unicode = inputString->IsUtf16() && ((flags & RegExpParser::FLAG_UTF16) != 0); + endIndex = AdvanceStringIndex(tagInputString, endIndex, unicode); } lastIndex = endIndex; } resultString += base::StringHelper::SubString(inputString, nextPosition, inputLength - nextPosition); auto resultValue = factory->NewFromStdString(resultString); - if (isCached) { - RegExpExecResultCache::AddResultInCache(thread, cacheTable, pattern, flagsBits, tagInputString, - JSHandle(resultValue), - RegExpExecResultCache::REPLACE_TYPE, lastIndex); + if (useCache) { + RegExpExecResultCache::AddResultInCache( + thread, cacheTable, pattern, flagsBits, tagInputString, JSHandle(resultValue), + RegExpExecResultCache::REPLACE_TYPE, lastIndex, globalConst->GetEmptyString()); } return resultValue.GetTaggedValue(); } @@ -658,14 +758,14 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv) // 3. Let S be ToString(string). JSHandle string = GetCallArg(argv, 0); JSHandle inputReplaceValue = GetCallArg(argv, 1); - JSHandle srcString = JSTaggedValue::ToString(thread, string); + JSHandle srPandaString = JSTaggedValue::ToString(thread, string); const GlobalEnvConstants *globalConst = thread->GlobalConstants(); // 4. ReturnIfAbrupt(S). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - JSHandle inputStr = JSHandle::Cast(srcString); + JSHandle inputStr = JSHandle::Cast(srPandaString); // 5. Let lengthS be the number of code unit elements in S. - uint32_t length = srcString->GetLength(); + uint32_t length = srPandaString->GetLength(); // 6. Let functionalReplace be IsCallable(replaceValue). bool functionalReplace = inputReplaceValue->IsCallable(); JSHandle replaceValueHandle; @@ -678,33 +778,64 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv) ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); JSHandle global = globalConst->GetHandledGlobalString(); JSTaggedValue globalValue = - FastRuntimeStub::FastGetProperty(thread, thisObj.GetTaggedValue(), global.GetTaggedValue()); + FastRuntimeStub::FastGetPropertyByValue(thread, thisObj.GetTaggedValue(), global.GetTaggedValue()); // 9. ReturnIfAbrupt(global). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); bool isGlobal = globalValue.ToBoolean(); + // 10. If global is true, then bool fullUnicode = false; if (isGlobal) { // a. Let fullUnicode be ToBoolean(Get(rx, "unicode")). JSHandle unicode = globalConst->GetHandledUnicodeString(); JSTaggedValue fullUnicodeTag = - FastRuntimeStub::FastGetProperty(thread, thisObj.GetTaggedValue(), unicode.GetTaggedValue()); + FastRuntimeStub::FastGetPropertyByValue(thread, thisObj.GetTaggedValue(), unicode.GetTaggedValue()); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); fullUnicode = fullUnicodeTag.ToBoolean(); // b. ReturnIfAbrupt(fullUnicode). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // c. Let setStatus be Set(rx, "lastIndex", 0, true). - FastRuntimeStub::FastSetProperty(thread, thisObj.GetTaggedValue(), lastIndex.GetTaggedValue(), JSTaggedValue(0), - true); + FastRuntimeStub::FastSetPropertyByValue(thread, thisObj.GetTaggedValue(), lastIndex.GetTaggedValue(), + JSTaggedValue(0)); // d. ReturnIfAbrupt(setStatus). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); } - if (isGlobal && !functionalReplace && (replaceValueHandle->GetLength() == 0) && thisObj->IsJSRegExp()) { + // Add cache for regexp replace + bool useCache = false; + JSMutableHandle pattern(thread, JSTaggedValue::Undefined()); + JSMutableHandle flagsBits(thread, JSTaggedValue::Undefined()); + JSHandle cacheTable(thread->GetEcmaVM()->GetRegExpCache()); + if (isGlobal && !functionalReplace && thisObj->IsJSRegExp()) { JSHClass *hclass = JSHandle::Cast(thisObj)->GetJSHClass(); JSHClass *originHClass = JSHClass::Cast(globalConst->GetJSRegExpClass().GetTaggedObject()); if (hclass == originHClass) { - return RegExpReplaceFast(thread, thisObj, srcString, length); + if (replaceValueHandle->GetLength() == 0) { + return RegExpReplaceFast(thread, thisObj, srPandaString, length); + } + JSHandle regexpHandle(thisObj); + if (regexpHandle->IsJSRegExp()) { + pattern.Update(regexpHandle->GetOriginalSource()); + flagsBits.Update(regexpHandle->GetOriginalFlags()); + } + uint32_t strLength = replaceValueHandle->GetLength(); + uint32_t largeStrCount = cacheTable->GetLargeStrCount(); + if (largeStrCount != 0) { + if (strLength > MIN_REPLACE_STRING_LENGTH) { + cacheTable->SetLargeStrCount(thread, --largeStrCount); + } + } else { + cacheTable->SetStrLenThreshold(thread, MIN_REPLACE_STRING_LENGTH); + } + if (strLength > cacheTable->GetStrLenThreshold()) { + useCache = ExecCachingAllowed(thread, thisObj); + JSTaggedValue cacheResult = cacheTable->FindCachedResult(thread, pattern, flagsBits, string, + RegExpExecResultCache::REPLACE_TYPE, thisObj, + inputReplaceValue.GetTaggedValue()); + if (cacheResult != JSTaggedValue::Undefined()) { + return cacheResult; + } + } } } @@ -718,7 +849,7 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv) JSMutableHandle execResult(thread, JSTaggedValue(0)); for (;;) { // a. Let result be RegExpExec(rx, S). - execResult.Update(RegExpExec(thread, thisObj, inputStr, false)); + execResult.Update(RegExpExec(thread, thisObj, inputStr, useCache)); // b. ReturnIfAbrupt(result). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // c. If result is null, set done to true. @@ -733,30 +864,33 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv) break; } // iii. Else, 1. Let matchStr be ToString(Get(result, "0")). - JSHandle getMatch( - thread, FastRuntimeStub::FastGetProperty(thread, execResult.GetTaggedValue(), matchedStr.GetTaggedValue())); + JSTaggedValue getMatchVal = + FastRuntimeStub::FastGetPropertyByValue(thread, execResult.GetTaggedValue(), matchedStr.GetTaggedValue()); + JSHandle getMatch(thread, getMatchVal); JSHandle matchString = JSTaggedValue::ToString(thread, getMatch); // 2. ReturnIfAbrupt(matchStr). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // 3. If matchStr is the empty String, then if (matchString->GetLength() == 0) { // a. Let thisIndex be ToLength(Get(rx, "lastIndex")). - JSHandle thisIndexHandle( - thread, FastRuntimeStub::FastGetProperty(thread, thisObj.GetTaggedValue(), lastIndex.GetTaggedValue())); + JSTaggedValue thisIndexVal = + FastRuntimeStub::FastGetPropertyByValue(thread, thisObj.GetTaggedValue(), lastIndex.GetTaggedValue()); + JSHandle thisIndexHandle(thread, thisIndexVal); uint32_t thisIndex = 0; if (thisIndexHandle->IsInt()) { - thisIndex = thisIndexHandle->GetInt(); + thisIndex = static_cast(thisIndexHandle->GetInt()); } else { - thisIndex = JSTaggedValue::ToLength(thread, thisIndexHandle).GetNumber(); + thisIndexVal = JSTaggedValue::ToLength(thread, thisIndexHandle); // b. ReturnIfAbrupt(thisIndex). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + thisIndex = thisIndexVal.GetNumber(); } // c. Let nextIndex be AdvanceStringIndex(S, thisIndex, fullUnicode). - uint32_t nextIndex = AdvanceStringIndex(thread, inputStr, thisIndex, fullUnicode); + uint32_t nextIndex = AdvanceStringIndex(inputStr, thisIndex, fullUnicode); nextIndexHandle.Update(JSTaggedValue(nextIndex)); // d. Let setStatus be Set(rx, "lastIndex", nextIndex, true). - FastRuntimeStub::FastSetProperty(thread, thisObj.GetTaggedValue(), lastIndex.GetTaggedValue(), - nextIndexHandle.GetTaggedValue(), true); + FastRuntimeStub::FastSetPropertyByValue(thread, thisObj.GetTaggedValue(), lastIndex.GetTaggedValue(), + nextIndexHandle.GetTaggedValue()); // e. ReturnIfAbrupt(setStatus). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); } @@ -774,26 +908,29 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv) resultValues.Update(FastRuntimeStub::FastGetPropertyByIndex(thread, resultsList.GetTaggedValue(), i)); // a. Let nCaptures be ToLength(Get(result, "length")). JSHandle lengthHandle = globalConst->GetHandledLengthString(); - ncapturesHandle.Update( - FastRuntimeStub::FastGetProperty(thread, resultValues.GetTaggedValue(), lengthHandle.GetTaggedValue())); + ncapturesHandle.Update(FastRuntimeStub::FastGetPropertyByValue(thread, resultValues.GetTaggedValue(), + lengthHandle.GetTaggedValue())); uint32_t ncaptures = JSTaggedValue::ToUint32(thread, ncapturesHandle); // b. ReturnIfAbrupt(nCaptures). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // c. Let nCaptures be max(nCaptures − 1, 0). ncaptures = std::max((ncaptures - 1), 0); // d. Let matched be ToString(Get(result, "0")). - getMatchString = JSObject::GetProperty(thread, resultValues, matchedStr).GetValue(); + JSTaggedValue value = FastRuntimeStub::GetPropertyByIndex(thread, resultValues.GetTaggedValue(), 0); + getMatchString = JSHandle(thread, value); JSHandle matchString = JSTaggedValue::ToString(thread, getMatchString); // e. ReturnIfAbrupt(matched). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // f. Let matchLength be the number of code units in matched. uint32_t matchLength = matchString->GetLength(); // g. Let position be ToInteger(Get(result, "index")). - JSHandle resultIndex(factory->NewFromCanBeCompressString("index")); - JSHandle positionHandle = JSObject::GetProperty(thread, resultValues, resultIndex).GetValue(); + JSHandle resultIndex = globalConst->GetHandledIndexString(); + JSTaggedValue positionTag = FastRuntimeStub::FastGetPropertyByValue(thread, resultValues.GetTaggedValue(), + resultIndex.GetTaggedValue()); + JSHandle positionHandle(thread, positionTag); uint32_t position = 0; if (positionHandle->IsInt()) { - position = positionHandle->GetInt(); + position = static_cast(positionHandle->GetInt()); } else { position = JSTaggedValue::ToUint32(thread, positionHandle); // h. ReturnIfAbrupt(position). @@ -826,10 +963,22 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv) // v. Let n be n+1 ++index; } + + // j. Let namedCaptures be ? Get(result, "groups"). + JSHandle groupsKey = globalConst->GetHandledGroupsString(); + JSTaggedValue named = + FastRuntimeStub::FastGetPropertyByValue(thread, resultValues.GetTaggedValue(), groupsKey.GetTaggedValue()); + JSHandle namedCaptures(thread, named); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // m. If functionalReplace is true, then PandaString replacement; - JSHandle replacerArgs = - factory->NewTaggedArray(3 + capturesList->GetLength()); // 3: «matched, pos, and string» + int emptyArrLength = 0; + if (namedCaptures->IsUndefined()) { + emptyArrLength = 3; // 3: «matched, pos, and string» + } else { + emptyArrLength = 4; // 4: «matched, pos, string, and groups» + } + JSHandle replacerArgs = factory->NewTaggedArray(emptyArrLength + capturesList->GetLength()); if (functionalReplace) { // i. Let replacerArgs be «matched». replacerArgs->Set(thread, 0, getMatchString.GetTaggedValue()); @@ -842,12 +991,15 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv) } replacerArgs->Set(thread, index + 1, JSTaggedValue(position)); replacerArgs->Set(thread, index + 2, inputStr.GetTaggedValue()); // 2: position of string + if (!namedCaptures->IsUndefined()) { + replacerArgs->Set(thread, index + 3, namedCaptures.GetTaggedValue()); // 3: position of groups + } // iv. Let replValue be Call(replaceValue, undefined, replacerArgs). + const int32_t argsLength = replacerArgs->GetLength(); JSHandle undefined = globalConst->GetHandledUndefined(); - ecmascript::InternalCallParams *args = thread->GetInternalCallParams(); - args->MakeArgList(*replacerArgs); + JSTaggedValue replaceResult = - JSFunction::Call(thread, inputReplaceValue, undefined, replacerArgs->GetLength(), args->GetArgv()); + JSFunction::Call(thread, inputReplaceValue, undefined, argsLength, replacerArgs->GetData()); JSHandle replValue(thread, replaceResult); // v. Let replacement be ToString(replValue). JSHandle replacementString = JSTaggedValue::ToString(thread, replValue); @@ -856,9 +1008,14 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv) replacement = ConvertToPandaString(*replacementString, StringConvertedUsage::LOGICOPERATION); } else { // n. Else, + if (!namedCaptures->IsUndefined()) { + JSHandle namedCapturesObj = JSTaggedValue::ToObject(thread, namedCaptures); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + namedCaptures = JSHandle::Cast(namedCapturesObj); + } JSHandle replacementHandle( - thread, BuiltinsString::GetSubstitution(thread, matchString, srcString, position, capturesList, - replaceValueHandle)); + thread, BuiltinsString::GetSubstitution(thread, matchString, srPandaString, position, capturesList, + namedCaptures, replaceValueHandle)); replacement = ConvertToPandaString(EcmaString::Cast(replacementHandle->GetTaggedObject()), StringConvertedUsage::LOGICOPERATION); } @@ -867,8 +1024,8 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv) // ii. Let accumulatedResult be the String formed by concatenating the code units of the current value // of accumulatedResult with the substring of S consisting of the code units from nextSourcePosition // (inclusive) up to position (exclusive) and with the code units of replacement. - accumulatedResult += ecmascript::base::StringHelper::SubString( - JSHandle::Cast(inputStr), nextSourcePosition, (position - nextSourcePosition)); + accumulatedResult += base::StringHelper::SubString(JSHandle::Cast(inputStr), nextSourcePosition, + (position - nextSourcePosition)); accumulatedResult += replacement; // iii. Let nextSourcePosition be position + matchLength. nextSourcePosition = position + matchLength; @@ -876,13 +1033,25 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv) } // 17. If nextSourcePosition ≥ lengthS, return accumulatedResult. if (nextSourcePosition >= length) { - return factory->NewFromStdString(accumulatedResult).GetTaggedValue(); + JSHandle resultValue = factory->NewFromStdString(accumulatedResult); + if (useCache) { + RegExpExecResultCache::AddResultInCache( + thread, cacheTable, pattern, flagsBits, string, JSHandle(resultValue), + RegExpExecResultCache::REPLACE_TYPE, nextIndexHandle->GetInt(), inputReplaceValue.GetTaggedValue()); + } + return resultValue.GetTaggedValue(); } // 18. Return the String formed by concatenating the code units of accumulatedResult with the substring of S // consisting of the code units from nextSourcePosition (inclusive) up through the final code unit of S(inclusive). - accumulatedResult += ecmascript::base::StringHelper::SubString(JSHandle::Cast(inputStr), - nextSourcePosition, (length - nextSourcePosition)); - return factory->NewFromStdString(accumulatedResult).GetTaggedValue(); + accumulatedResult += base::StringHelper::SubString(JSHandle::Cast(inputStr), nextSourcePosition, + (length - nextSourcePosition)); + JSHandle resultValue = factory->NewFromStdString(accumulatedResult); + if (useCache) { + RegExpExecResultCache::AddResultInCache( + thread, cacheTable, pattern, flagsBits, string, JSHandle(resultValue), + RegExpExecResultCache::REPLACE_TYPE, nextIndexHandle->GetInt(), inputReplaceValue.GetTaggedValue()); + } + return resultValue.GetTaggedValue(); } // 21.2.5.9 @@ -932,8 +1101,7 @@ JSTaggedValue BuiltinsRegExp::Search(EcmaRuntimeCallInfo *argv) return JSTaggedValue(-1); } // 10. Return ? Get(result, "index"). - ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); - JSHandle index(factory->NewFromCanBeCompressString("index")); + JSHandle index(thread->GlobalConstants()->GetHandledIndexString()); return JSObject::GetProperty(thread, result, index).GetValue().GetTaggedValue(); } @@ -945,7 +1113,7 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) BUILTINS_API_TRACE(argv->GetThread(), RegExp, Split); JSThread *thread = argv->GetThread(); [[maybe_unused]] EcmaHandleScope handleScope(thread); - bool isCached = false; + bool useCache = false; // 1. Let rx be the this value. JSHandle thisObj = GetThis(argv); auto ecmaVm = thread->GetEcmaVM(); @@ -969,7 +1137,8 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // 7. Let flags be ToString(Get(rx, "flags")). ObjectFactory *factory = ecmaVm->GetFactory(); - JSHandle flagsString(factory->NewFromCanBeCompressString("flags")); + const GlobalEnvConstants *globalConstants = thread->GlobalConstants(); + JSHandle flagsString(globalConstants->GetHandledFlagsString()); JSHandle taggedFlags = JSObject::GetProperty(thread, thisObj, flagsString).GetValue(); JSHandle flags; @@ -982,16 +1151,16 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // 9. If flags contains "u", let unicodeMatching be true. // 10. Else, let unicodeMatching be false. - JSHandle uStringHandle(factory->NewFromCanBeCompressString("u")); - bool unicodeMatching = ecmascript::base::StringHelper::Contains(*flags, *uStringHandle); + JSHandle uStringHandle(globalConstants->GetHandledUString()); + bool unicodeMatching = base::StringHelper::Contains(*flags, *uStringHandle); // 11. If flags contains "y", let newFlags be flags. JSHandle newFlagsHandle; - JSHandle yStringHandle(factory->NewFromCanBeCompressString("y")); - if (ecmascript::base::StringHelper::Contains(*flags, *yStringHandle)) { + JSHandle yStringHandle = JSHandle::Cast(globalConstants->GetHandledYString()); + if (base::StringHelper::Contains(*flags, *yStringHandle)) { newFlagsHandle = flags; } else { // 12. Else, let newFlags be the string that is the concatenation of flags and "y". - JSHandle yStr = factory->NewFromCanBeCompressString("y"); + JSHandle yStr = JSHandle::Cast(globalConstants->GetHandledYString()); newFlagsHandle = factory->ConcatFromString(flags, yStr); } @@ -1006,7 +1175,7 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) } if (lim == MAX_SPLIT_LIMIT) { - isCached = true; + useCache = ExecCachingAllowed(thread, thisObj); } JSHandle regexpHandle(thisObj); @@ -1017,7 +1186,7 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) flagsBits.Update(regexpHandle->GetOriginalFlags()); } JSHandle cacheTable(thread->GetEcmaVM()->GetRegExpCache()); - if (isCached) { + if (useCache) { JSTaggedValue cacheResult = cacheTable->FindCachedResult(thread, pattern, flagsBits, inputString, RegExpExecResultCache::SPLIT_TYPE, thisObj); if (cacheResult != JSTaggedValue::Undefined()) { @@ -1027,11 +1196,11 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) // 13. Let splitter be Construct(C, «rx, newFlags»). JSHandle globalObject(thread, thread->GetEcmaVM()->GetGlobalEnv()->GetGlobalObject()); - JSHandle undefined(thread, JSTaggedValue::Undefined()); + JSHandle undefined = globalConstants->GetHandledUndefined(); + InternalCallParams *arguments = thread->GetInternalCallParams(); - arguments->MakeArgv(thisObj, newFlagsHandle); - JSTaggedValue taggedSplitter = - JSFunction::Construct(thread, constructor, 2, arguments->GetArgv(), undefined); // 2: two args + arguments->MakeArgv(thisObj.GetTaggedValue(), newFlagsHandle.GetTaggedValue()); + JSTaggedValue taggedSplitter = JSFunction::Construct(thread, constructor, 2, arguments->GetArgv(), undefined); // 14. ReturnIfAbrupt(splitter). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); @@ -1052,7 +1221,7 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) // 22. If size = 0, then if (size == 0) { // a. Let z be RegExpExec(splitter, S). - JSHandle execResult(thread, RegExpExec(thread, splitter, jsString, isCached)); + JSHandle execResult(thread, RegExpExec(thread, splitter, jsString, useCache)); // b. ReturnIfAbrupt(z). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // c. If z is not null, return A. @@ -1069,19 +1238,19 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) uint32_t endIndex = startIndex; JSMutableHandle lastIndexvalue(thread, JSTaggedValue(endIndex)); // 24. Repeat, while q < size - JSHandle lastIndexString(thread->GlobalConstants()->GetHandledLastIndexString()); + JSHandle lastIndexString = globalConstants->GetHandledLastIndexString(); while (endIndex < size) { // a. Let setStatus be Set(splitter, "lastIndex", q, true). lastIndexvalue.Update(JSTaggedValue(endIndex)); JSObject::SetProperty(thread, splitter, lastIndexString, lastIndexvalue, true); // b. ReturnIfAbrupt(setStatus). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - JSHandle execResult(thread, RegExpExec(thread, splitter, jsString, isCached)); + JSHandle execResult(thread, RegExpExec(thread, splitter, jsString, useCache)); // d. ReturnIfAbrupt(z). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // e. If z is null, let q be AdvanceStringIndex(S, q, unicodeMatching). if (execResult->IsNull()) { - endIndex = AdvanceStringIndex(thread, jsString, endIndex, unicodeMatching); + endIndex = AdvanceStringIndex(jsString, endIndex, unicodeMatching); } else { // f. Else z is not null, // i. Let e be ToLength(Get(splitter, "lastIndex")). @@ -1093,13 +1262,13 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) uint32_t lastIndex = lastIndexNumber.GetNumber(); // iii. If e = p, let q be AdvanceStringIndex(S, q, unicodeMatching). if (lastIndex == startIndex) { - endIndex = AdvanceStringIndex(thread, jsString, endIndex, unicodeMatching); + endIndex = AdvanceStringIndex(jsString, endIndex, unicodeMatching); } else { // iv. Else e != p, // 1. Let T be a String value equal to the substring of S consisting of the elements at indices p // (inclusive) through q (exclusive). - std::string stdStrT = ecmascript::base::StringHelper::SubString(JSHandle::Cast(jsString), - startIndex, (endIndex - startIndex)); + std::string stdStrT = base::StringHelper::SubString(JSHandle::Cast(jsString), startIndex, + (endIndex - startIndex)); // 2. Assert: The following call will never result in an abrupt completion. // 3. Perform CreateDataProperty(A, ToString(lengthA), T). JSHandle tValue(factory->NewFromStdString(stdStrT)); @@ -1108,7 +1277,7 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) ++aLength; // 5. If lengthA = lim, return A. if (aLength == lim) { - if (isCached) { + if (useCache) { RegExpExecResultCache::AddResultInCache(thread, cacheTable, pattern, flagsBits, inputString, JSHandle(array), RegExpExecResultCache::SPLIT_TYPE, lastIndex); @@ -1118,7 +1287,7 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) // 6. Let p be e. startIndex = lastIndex; // 7. Let numberOfCaptures be ToLength(Get(z, "length")). - JSHandle lengthString(factory->NewFromCanBeCompressString("length")); + JSHandle lengthString(thread->GlobalConstants()->GetHandledLengthString()); JSHandle capturesHandle = JSObject::GetProperty(thread, execResult, lengthString).GetValue(); JSTaggedNumber numberOfCapturesNumber = JSTaggedValue::ToLength(thread, capturesHandle); @@ -1143,7 +1312,7 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) ++aLength; // f. If lengthA = lim, return A. if (aLength == lim) { - if (isCached) { + if (useCache) { RegExpExecResultCache::AddResultInCache(thread, cacheTable, pattern, flagsBits, inputString, JSHandle(array), RegExpExecResultCache::SPLIT_TYPE, lastIndex); @@ -1158,8 +1327,8 @@ JSTaggedValue BuiltinsRegExp::Split(EcmaRuntimeCallInfo *argv) } // 25. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) // through size (exclusive). - std::string stdStrT = ecmascript::base::StringHelper::SubString(JSHandle::Cast(jsString), startIndex, - (size - startIndex)); + std::string stdStrT = + base::StringHelper::SubString(JSHandle::Cast(jsString), startIndex, (size - startIndex)); // 26. Assert: The following call will never result in an abrupt completion. // 27. Perform CreateDataProperty(A, ToString(lengthA), t). JSHandle tValue(factory->NewFromStdString(stdStrT)); @@ -1183,17 +1352,16 @@ RegExpExecutor::MatchResult BuiltinsRegExp::Matcher(JSThread *thread, const JSHa void *dynBuf = JSNativePointer::Cast(bufferData.GetTaggedObject())->GetExternalPointer(); auto bytecodeBuffer = reinterpret_cast(dynBuf); // execute - RegExpExecutor executor; + RegExpExecutor executor {}; if (lastIndex < 0) { lastIndex = 0; } - bool ret = executor.Execute(buffer, lastIndex, length, bytecodeBuffer, isUtf16); + bool ret = executor.Execute(buffer, lastIndex, static_cast(length), bytecodeBuffer, isUtf16); RegExpExecutor::MatchResult result = executor.GetResult(thread, ret); return result; } -uint32_t BuiltinsRegExp::AdvanceStringIndex([[maybe_unused]] JSThread *thread, const JSHandle &inputStr, - uint32_t index, bool unicode) +uint32_t BuiltinsRegExp::AdvanceStringIndex(const JSHandle &inputStr, uint32_t index, bool unicode) { // 1. Assert: Type(S) is String. ASSERT(inputStr->IsString()); @@ -1245,21 +1413,20 @@ bool BuiltinsRegExp::GetFlagsInternal(JSThread *thread, const JSHandle(regexpObj->GetOriginalFlags().GetInt()); - return (flags & mask) != 0U; + return (flags & mask) != 0; } // 21.2.5.2.2 JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle ®exp, - const JSHandle &inputStr, bool isCached) + const JSHandle &inputStr, bool useCache) { ASSERT(JSObject::IsRegExp(thread, regexp)); ASSERT(inputStr->IsString()); - int32_t length = static_cast(inputStr->GetTaggedObject())->GetLength(); const GlobalEnvConstants *globalConst = thread->GlobalConstants(); JSHandle lastIndexHandle = globalConst->GetHandledLastIndexString(); JSTaggedValue result = - FastRuntimeStub::FastGetProperty(thread, regexp.GetTaggedValue(), lastIndexHandle.GetTaggedValue()); + FastRuntimeStub::FastGetPropertyByValue(thread, regexp.GetTaggedValue(), lastIndexHandle.GetTaggedValue()); int32_t lastIndex = 0; if (result.IsInt()) { lastIndex = result.GetInt(); @@ -1269,38 +1436,28 @@ JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); lastIndex = lastIndexNumber.GetNumber(); } + JSHandle globalHandle = globalConst->GetHandledGlobalString(); bool global = - FastRuntimeStub::FastGetProperty(thread, regexp.GetTaggedValue(), globalHandle.GetTaggedValue()).ToBoolean(); + FastRuntimeStub::FastGetPropertyByValue(thread, regexp.GetTaggedValue(), globalHandle.GetTaggedValue()) + .ToBoolean(); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); JSHandle stickyHandle = globalConst->GetHandledStickyString(); bool sticky = - FastRuntimeStub::FastGetProperty(thread, regexp.GetTaggedValue(), stickyHandle.GetTaggedValue()).ToBoolean(); + FastRuntimeStub::FastGetPropertyByValue(thread, regexp.GetTaggedValue(), stickyHandle.GetTaggedValue()) + .ToBoolean(); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); if (!global && !sticky) { lastIndex = 0; } + JSHandle regexpObj(regexp); - JSMutableHandle pattern(thread, JSTaggedValue::Undefined()); - JSMutableHandle flags(thread, JSTaggedValue::Undefined()); - if (regexp->IsJSRegExp()) { - pattern.Update(regexpObj->GetOriginalSource()); - flags.Update(regexpObj->GetOriginalFlags()); - } + JSMutableHandle pattern(thread, regexpObj->GetOriginalSource()); + JSMutableHandle flags(thread, regexpObj->GetOriginalFlags()); + JSHandle cacheTable(thread->GetEcmaVM()->GetRegExpCache()); - if (lastIndex == 0 && isCached) { - JSTaggedValue cacheResult = - cacheTable->FindCachedResult(thread, pattern, flags, inputStr, RegExpExecResultCache::EXEC_TYPE, regexp); - if (cacheResult != JSTaggedValue::Undefined()) { - return cacheResult; - } - } - uint8_t flagsBits = static_cast(regexpObj->GetOriginalFlags().GetInt()); - JSHandle flagsValue(thread, FlagsBitsToString(thread, flagsBits)); - JSHandle flagsStr = JSTaggedValue::ToString(thread, flagsValue); - JSHandle uString(globalConst->GetHandledUString()); - [[maybe_unused]] bool fullUnicode = base::StringHelper::Contains(*flagsStr, *uString); - if (lastIndex > length) { + uint32_t length = static_cast(inputStr->GetTaggedObject())->GetLength(); + if (lastIndex > static_cast(length)) { FastRuntimeStub::FastSetPropertyByValue(thread, regexp.GetTaggedValue(), lastIndexHandle.GetTaggedValue(), JSTaggedValue(0)); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); @@ -1354,6 +1511,17 @@ JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle // 27. Perform CreateDataProperty(A, "0", matched_substr). JSHandle zeroValue(matchResult.captures_[0].second); JSObject::CreateDataProperty(thread, results, 0, zeroValue); + ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); + + JSHandle groupName(thread, regexpObj->GetGroupName()); + JSMutableHandle groups(thread, JSTaggedValue::Undefined()); + if (!groupName->IsUndefined()) { + JSHandle nullHandle(thread, JSTaggedValue::Null()); + JSHandle nullObj = factory->OrdinaryNewJSObjectCreate(nullHandle); + groups.Update(nullObj.GetTaggedValue()); + } + JSHandle groupsKey = globalConst->GetHandledGroupsString(); + JSObject::CreateDataProperty(thread, results, groupsKey, groups); // 28. For each integer i such that i > 0 and i <= n for (uint32_t i = 1; i < capturesSize; i++) { // a. Let capture_i be ith element of r's captures List @@ -1365,8 +1533,16 @@ JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle } JSHandle iValue(thread, capturedValue); JSObject::CreateDataProperty(thread, results, i, iValue); + if (!groupName->IsUndefined()) { + JSHandle groupObject = JSHandle::Cast(groups); + TaggedArray *groupArray = TaggedArray::Cast(regexpObj->GetGroupName().GetTaggedObject()); + if (groupArray->GetLength() > i - 1) { + JSHandle skey(thread, groupArray->Get(i - 1)); + JSObject::CreateDataProperty(thread, groupObject, skey, iValue); + } + } } - if (lastIndex == 0 && isCached) { + if (lastIndex == 0 && useCache) { RegExpExecResultCache::AddResultInCache(thread, cacheTable, pattern, flags, inputStr, JSHandle(results), RegExpExecResultCache::EXEC_TYPE, endIndex); @@ -1377,33 +1553,27 @@ JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle // 21.2.5.2.1 JSTaggedValue BuiltinsRegExp::RegExpExec(JSThread *thread, const JSHandle ®exp, - const JSHandle &inputString, bool isCached) + const JSHandle &inputString, bool useCache) { // 1. Assert: Type(R) is Object. ASSERT(regexp->IsECMAObject()); // 2. Assert: Type(S) is String. ASSERT(inputString->IsString()); // 3. Let exec be Get(R, "exec"). - JSHandle thisObj(thread, regexp->GetTaggedObject()); JSHandle inputStr = JSTaggedValue::ToString(thread, inputString); - JSHandle execHandle(thread->GlobalConstants()->GetHandledExecString()); - JSHandle exec( - thread, FastRuntimeStub::FastGetProperty(thread, thisObj.GetTaggedValue(), execHandle.GetTaggedValue())); + const GlobalEnvConstants *globalConst = thread->GlobalConstants(); + JSHandle execHandle = globalConst->GetHandledExecString(); + JSTaggedValue execVal = + FastRuntimeStub::FastGetPropertyByValue(thread, regexp.GetTaggedValue(), execHandle.GetTaggedValue()); + JSHandle exec(thread, execVal); // 4. ReturnIfAbrupt(exec). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // 5. If IsCallable(exec) is true, then if (exec->IsCallable()) { - JSHClass *hclass = JSHandle::Cast(regexp)->GetJSHClass(); - JSHClass *originHClass = JSHClass::Cast(thread->GlobalConstants()->GetJSRegExpClass().GetTaggedObject()); - if (hclass == originHClass) { - // 7. Return RegExpBuiltinExec(R, S). - return RegExpBuiltinExec(thread, regexp, inputString, isCached); - } - JSHandle obj = JSHandle::Cast(thisObj); InternalCallParams *arguments = thread->GetInternalCallParams(); arguments->MakeArgv(inputStr.GetTaggedValue()); - JSTaggedValue result = JSFunction::Call(thread, exec, obj, 1, arguments->GetArgv()); + JSTaggedValue result = JSFunction::Call(thread, exec, regexp, 1, arguments->GetArgv()); // b. ReturnIfAbrupt(result). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); if (!result.IsECMAObject() && !result.IsNull()) { @@ -1413,12 +1583,12 @@ JSTaggedValue BuiltinsRegExp::RegExpExec(JSThread *thread, const JSHandleIsJSRegExp()) { + if (!regexp->IsJSRegExp()) { // throw a TypeError exception. THROW_TYPE_ERROR_AND_RETURN(thread, "this does not have a [[RegExpMatcher]]", JSTaggedValue::Exception()); } // 7. Return RegExpBuiltinExec(R, S). - return RegExpBuiltinExec(thread, regexp, inputString, isCached); + return RegExpBuiltinExec(thread, regexp, inputString, useCache); } // 21.2.3.2.1 @@ -1465,14 +1635,14 @@ uint32_t BuiltinsRegExp::UpdateExpressionFlags(JSThread *thread, const PandaStri default: { ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); JSHandle syntaxError = - factory->GetJSError(ecmascript::base::ErrorType::SYNTAX_ERROR, "invalid regular expression flags"); + factory->GetJSError(base::ErrorType::SYNTAX_ERROR, "invalid regular expression flags"); THROW_NEW_ERROR_AND_RETURN_VALUE(thread, syntaxError.GetTaggedValue(), 0); } } if ((flagsBits & flagsBitsTemp) != 0) { ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); JSHandle syntaxError = - factory->GetJSError(ecmascript::base::ErrorType::SYNTAX_ERROR, "invalid regular expression flags"); + factory->GetJSError(base::ErrorType::SYNTAX_ERROR, "invalid regular expression flags"); THROW_NEW_ERROR_AND_RETURN_VALUE(thread, syntaxError.GetTaggedValue(), 0); } flagsBits |= flagsBitsTemp; @@ -1484,42 +1654,35 @@ JSTaggedValue BuiltinsRegExp::FlagsBitsToString(JSThread *thread, uint8_t flags) { ASSERT((flags & static_cast(0xC0)) == 0); // 0xC0: first 2 bits of flags must be 0 - auto *flagsStr = new uint8_t[7]; // 7: maximum 6 flags + '\0' + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + std::array flagsStr; // 7: maximum 6 flags + '\0' size_t flagsLen = 0; - if ((flags & RegExpParser::FLAG_GLOBAL) != 0U) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if ((flags & RegExpParser::FLAG_GLOBAL) != 0) { flagsStr[flagsLen] = 'g'; flagsLen++; } - if ((flags & RegExpParser::FLAG_IGNORECASE) != 0U) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if ((flags & RegExpParser::FLAG_IGNORECASE) != 0) { flagsStr[flagsLen] = 'i'; flagsLen++; } - if ((flags & RegExpParser::FLAG_MULTILINE) != 0U) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if ((flags & RegExpParser::FLAG_MULTILINE) != 0) { flagsStr[flagsLen] = 'm'; flagsLen++; } - if ((flags & RegExpParser::FLAG_DOTALL) != 0U) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if ((flags & RegExpParser::FLAG_DOTALL) != 0) { flagsStr[flagsLen] = 's'; flagsLen++; } - if ((flags & RegExpParser::FLAG_UTF16) != 0U) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if ((flags & RegExpParser::FLAG_UTF16) != 0) { flagsStr[flagsLen] = 'u'; flagsLen++; } - if ((flags & RegExpParser::FLAG_STICKY) != 0U) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if ((flags & RegExpParser::FLAG_STICKY) != 0) { flagsStr[flagsLen] = 'y'; flagsLen++; } - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) flagsStr[flagsLen] = '\0'; - JSHandle flagsString = thread->GetEcmaVM()->GetFactory()->NewFromUtf8(flagsStr, flagsLen); - delete[] flagsStr; + JSHandle flagsString = thread->GetEcmaVM()->GetFactory()->NewFromUtf8(flagsStr.data(), flagsLen); return flagsString.GetTaggedValue(); } @@ -1564,7 +1727,8 @@ JSTaggedValue BuiltinsRegExp::RegExpInitialize(JSThread *thread, const JSHandle< // 9. 10. RegExpParser parser = RegExpParser(); RegExpParserCache *regExpParserCache = thread->GetEcmaVM()->GetRegExpParserCache(); - auto getCache = regExpParserCache->GetCache(*patternStrHandle, flagsBits); + PandaVector groupName; + auto getCache = regExpParserCache->GetCache(*patternStrHandle, flagsBits, groupName); if (getCache.first == JSTaggedValue::Hole()) { parser.Init(const_cast(reinterpret_cast(patternStdStr.c_str())), patternStdStr.size(), flagsBits); @@ -1574,26 +1738,36 @@ JSTaggedValue BuiltinsRegExp::RegExpInitialize(JSThread *thread, const JSHandle< factory->GetJSError(base::ErrorType::SYNTAX_ERROR, parser.GetErrorMsg().c_str()); THROW_NEW_ERROR_AND_RETURN_VALUE(thread, syntaxError.GetTaggedValue(), JSTaggedValue::Exception()); } + groupName = parser.GetGroupNames(); } JSHandle regexp(thread, JSRegExp::Cast(obj->GetTaggedObject())); // 11. Set the value of obj’s [[OriginalSource]] internal slot to P. regexp->SetOriginalSource(thread, patternStrHandle.GetTaggedValue()); // 12. Set the value of obj’s [[OriginalFlags]] internal slot to F. regexp->SetOriginalFlags(thread, JSTaggedValue(flagsBits)); + if (!groupName.empty()) { + JSHandle taggedArray = factory->NewTaggedArray(groupName.size()); + for (size_t i = 0; i < groupName.size(); ++i) { + JSHandle flagsKey(factory->NewFromString(groupName[i])); + taggedArray->Set(thread, i, flagsKey); + } + regexp->SetGroupName(thread, taggedArray); + } // 13. Set obj’s [[RegExpMatcher]] internal slot. if (getCache.first == JSTaggedValue::Hole()) { auto bufferSize = parser.GetOriginBufferSize(); auto buffer = parser.GetOriginBuffer(); factory->NewJSRegExpByteCodeData(regexp, buffer, bufferSize); - regExpParserCache->SetCache(*patternStrHandle, flagsBits, regexp->GetByteCodeBuffer(), bufferSize); + regExpParserCache->SetCache(*patternStrHandle, flagsBits, regexp->GetByteCodeBuffer(), bufferSize, + std::move(groupName)); } else { regexp->SetByteCodeBuffer(thread, getCache.first); regexp->SetLength(thread, JSTaggedValue(static_cast(getCache.second))); } // 14. Let setStatus be Set(obj, "lastIndex", 0, true). JSHandle lastIndexString = thread->GlobalConstants()->GetHandledLastIndexString(); - FastRuntimeStub::FastSetProperty(thread, obj.GetTaggedValue(), lastIndexString.GetTaggedValue(), JSTaggedValue(0), - true); + FastRuntimeStub::FastSetPropertyByValue(thread, obj.GetTaggedValue(), lastIndexString.GetTaggedValue(), + JSTaggedValue(0)); // 15. ReturnIfAbrupt(setStatus). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // 16. Return obj. @@ -1629,9 +1803,9 @@ EcmaString *BuiltinsRegExp::EscapeRegExpPattern(JSThread *thread, const JSHandle srcStdStr = "(?:)"; } // "/" -> "\/" - srcStdStr = ecmascript::base::StringHelper::RepalceAll(srcStdStr, "/", "\\/"); + srcStdStr = base::StringHelper::RepalceAll(srcStdStr, "/", "\\/"); // "\\" -> "\" - srcStdStr = ecmascript::base::StringHelper::RepalceAll(srcStdStr, "\\", "\\"); + srcStdStr = base::StringHelper::RepalceAll(srcStdStr, "\\", "\\"); return *factory->NewFromString(srcStdStr); } @@ -1654,7 +1828,7 @@ JSTaggedValue RegExpExecResultCache::CreateCacheTable(JSThread *thread) JSTaggedValue RegExpExecResultCache::FindCachedResult(JSThread *thread, const JSHandle &pattern, const JSHandle &flags, const JSHandle &input, CacheType type, - const JSHandle ®exp) + const JSHandle ®exp, JSTaggedValue extend) { JSTaggedValue patternValue = pattern.GetTaggedValue(); JSTaggedValue flagsValue = flags.GetTaggedValue(); @@ -1664,11 +1838,11 @@ JSTaggedValue RegExpExecResultCache::FindCachedResult(JSThread *thread, const JS return JSTaggedValue::Undefined(); } - uint32_t hash = pattern->GetKeyHashCode() + flags->GetInt() + input->GetKeyHashCode(); - uint32_t entry = hash & static_cast((GetCacheLength() - 1)); - if (!Match(entry, patternValue, flagsValue, inputValue)) { - uint32_t entry2 = (entry + 1) & static_cast((GetCacheLength() - 1)); - if (!Match(entry2, patternValue, flagsValue, inputValue)) { + uint32_t hash = pattern->GetKeyHashCode() + static_cast(flags->GetInt()) + input->GetKeyHashCode(); + uint32_t entry = hash & static_cast(GetCacheLength() - 1); + if (!Match(entry, patternValue, flagsValue, inputValue, extend)) { + uint32_t entry2 = (entry + 1) & static_cast(GetCacheLength() - 1); + if (!Match(entry2, patternValue, flagsValue, inputValue, extend)) { return JSTaggedValue::Undefined(); } entry = entry2; @@ -1703,7 +1877,7 @@ void RegExpExecResultCache::AddResultInCache(JSThread *thread, JSHandle &pattern, const JSHandle &flags, const JSHandle &input, const JSHandle &resultArray, CacheType type, - uint32_t lastIndex) + uint32_t lastIndex, JSTaggedValue extend) { if (!pattern->IsString() || !flags->IsInt() || !input->IsString()) { return; @@ -1714,18 +1888,20 @@ void RegExpExecResultCache::AddResultInCache(JSThread *thread, JSHandleGetKeyHashCode() + flags->GetInt() + input->GetKeyHashCode(); - uint32_t entry = hash & static_cast((cache->GetCacheLength() - 1)); + uint32_t hash = + patternValue.GetKeyHashCode() + static_cast(flagsValue.GetInt()) + inputValue.GetKeyHashCode(); + uint32_t entry = hash & static_cast(cache->GetCacheLength() - 1); uint32_t index = CACHE_TABLE_HEADER_SIZE + entry * ENTRY_SIZE; if (cache->Get(index) == JSTaggedValue::Undefined()) { cache->SetCacheCount(thread, cache->GetCacheCount() + 1); - cache->SetEntry(thread, entry, patternValue, flagsValue, inputValue, lastIndexValue); + cache->SetEntry(thread, entry, patternValue, flagsValue, inputValue, lastIndexValue, extend); cache->UpdateResultArray(thread, entry, resultArray.GetTaggedValue(), type); - } else if (cache->Match(entry, patternValue, flagsValue, inputValue)) { + } else if (cache->Match(entry, patternValue, flagsValue, inputValue, extend)) { cache->UpdateResultArray(thread, entry, resultArray.GetTaggedValue(), type); } else { - uint32_t entry2 = (entry + 1) & static_cast((cache->GetCacheLength() - 1)); + uint32_t entry2 = (entry + 1) & static_cast(cache->GetCacheLength() - 1); uint32_t index2 = CACHE_TABLE_HEADER_SIZE + entry2 * ENTRY_SIZE; + JSHandle extendHandle(thread, extend); if (cache->GetCacheLength() < DEFAULT_CACHE_NUMBER) { GrowRegexpCache(thread, cache); // update value after gc. @@ -1734,20 +1910,21 @@ void RegExpExecResultCache::AddResultInCache(JSThread *thread, JSHandleSetCacheLength(thread, DEFAULT_CACHE_NUMBER); - entry2 = hash & static_cast((cache->GetCacheLength() - 1)); + entry2 = hash & static_cast(cache->GetCacheLength() - 1); index2 = CACHE_TABLE_HEADER_SIZE + entry2 * ENTRY_SIZE; } + JSTaggedValue extendValue = extendHandle.GetTaggedValue(); if (cache->Get(index2) == JSTaggedValue::Undefined()) { cache->SetCacheCount(thread, cache->GetCacheCount() + 1); - cache->SetEntry(thread, entry2, patternValue, flagsValue, inputValue, lastIndexValue); + cache->SetEntry(thread, entry2, patternValue, flagsValue, inputValue, lastIndexValue, extendValue); cache->UpdateResultArray(thread, entry2, resultArray.GetTaggedValue(), type); - } else if (cache->Match(entry2, patternValue, flagsValue, inputValue)) { + } else if (cache->Match(entry2, patternValue, flagsValue, inputValue, extendValue)) { cache->UpdateResultArray(thread, entry2, resultArray.GetTaggedValue(), type); } else { cache->SetConflictCount(thread, cache->GetConflictCount() > 1 ? (cache->GetConflictCount() - 1) : 0); cache->SetCacheCount(thread, cache->GetCacheCount() - 1); cache->ClearEntry(thread, entry2); - cache->SetEntry(thread, entry, patternValue, flagsValue, inputValue, lastIndexValue); + cache->SetEntry(thread, entry, patternValue, flagsValue, inputValue, lastIndexValue, extendValue); cache->UpdateResultArray(thread, entry, resultArray.GetTaggedValue(), type); } } @@ -1762,13 +1939,14 @@ void RegExpExecResultCache::GrowRegexpCache(JSThread *thread, JSHandle(flags.GetInt()); + uint8_t flagsBits = flags.GetInt(); EcmaString *inputStr = EcmaString::Cast(input.GetTaggedObject()); EcmaString *keyPatternStr = EcmaString::Cast(keyPattern.GetTaggedObject()); - auto keyFlagsBits = static_cast(keyFlags.GetInt()); + uint8_t keyFlagsBits = keyFlags.GetInt(); EcmaString *keyInputStr = EcmaString::Cast(keyInput.GetTaggedObject()); + bool extendEqual = false; + if (extend.IsString() && keyExtend.IsString()) { + EcmaString *extendStr = EcmaString::Cast(extend.GetTaggedObject()); + EcmaString *keyExtendStr = EcmaString::Cast(keyExtend.GetTaggedObject()); + extendEqual = EcmaString::StringsAreEqual(extendStr, keyExtendStr); + } else if (extend.IsUndefined() && keyExtend.IsUndefined()) { + extendEqual = true; + } else { + return false; + } return EcmaString::StringsAreEqual(patternStr, keyPatternStr) && flagsBits == keyFlagsBits && - EcmaString::StringsAreEqual(inputStr, keyInputStr); + EcmaString::StringsAreEqual(inputStr, keyInputStr) && extendEqual; } } // namespace panda::ecmascript::builtins diff --git a/runtime/builtins/builtins_regexp.h b/runtime/builtins/builtins_regexp.h index 776b69ac1812219275260f9f1939f2e7bb0c33a5..4d4f8569e37199cb3bc3d47fb679c1b4fc558578 100644 --- a/runtime/builtins/builtins_regexp.h +++ b/runtime/builtins/builtins_regexp.h @@ -55,6 +55,8 @@ public: static JSTaggedValue GetSpecies(EcmaRuntimeCallInfo *argv); // 21.2.5.6 RegExp.prototype [ @@match ] ( string ) static JSTaggedValue Match(EcmaRuntimeCallInfo *argv); + // 22.2.5.8 RegExp.prototype [ @@matchAll ] ( string ) + static JSTaggedValue MatchAll(EcmaRuntimeCallInfo *argv); // 21.2.5.8 RegExp.prototype [ @@replace ] ( string, replaceValue ) static JSTaggedValue Replace(EcmaRuntimeCallInfo *argv); // 21.2.5.9 RegExp.prototype [ @@search ] ( string ) @@ -65,6 +67,11 @@ public: static JSTaggedValue RegExpCreate(JSThread *thread, const JSHandle &pattern, const JSHandle &flags); static JSTaggedValue FlagsBitsToString(JSThread *thread, uint8_t flags); + // 21.2.5.2.1 Runtime Semantics: RegExpExec ( R, S ) + static JSTaggedValue RegExpExec(JSThread *thread, const JSHandle ®exp, + const JSHandle &inputString, bool useCache); + // 21.2.5.2.3 AdvanceStringIndex ( S, index, unicode ) + static uint32_t AdvanceStringIndex(const JSHandle &inputStr, uint32_t index, bool unicode); private: static constexpr uint32_t MIN_REPLACE_STRING_LENGTH = 1000; @@ -72,17 +79,12 @@ private: static RegExpExecutor::MatchResult Matcher(JSThread *thread, const JSHandle ®exp, const uint8_t *buffer, size_t length, int32_t lastindex, bool isUtf16); - // 21.2.5.2.3 AdvanceStringIndex ( S, index, unicode ) - static uint32_t AdvanceStringIndex(JSThread *thread, const JSHandle &inputStr, uint32_t index, - bool unicode); static bool GetFlagsInternal(JSThread *thread, const JSHandle &obj, uint8_t mask); // 21.2.5.2.2 Runtime Semantics: RegExpBuiltinExec ( R, S ) static JSTaggedValue RegExpBuiltinExec(JSThread *thread, const JSHandle ®exp, - const JSHandle &inputStr, bool isCached); - // 21.2.5.2.1 Runtime Semantics: RegExpExec ( R, S ) - static JSTaggedValue RegExpExec(JSThread *thread, const JSHandle ®exp, - const JSHandle &inputString, bool isCached); + const JSHandle &inputStr, bool useCache); + // 21.2.3.2.1 Runtime Semantics: RegExpAlloc ( newTarget ) static JSTaggedValue RegExpAlloc(JSThread *thread, const JSHandle &newTarget); @@ -106,21 +108,25 @@ public: return reinterpret_cast(object); } static JSTaggedValue CreateCacheTable(JSThread *thread); + // extend as an additional parameter to judge cached JSTaggedValue FindCachedResult(JSThread *thread, const JSHandle &pattern, const JSHandle &flags, const JSHandle &input, - CacheType type, const JSHandle ®exp); + CacheType type, const JSHandle ®exp, + JSTaggedValue extend = JSTaggedValue::Undefined()); + // extend as an additional parameter to judge cached static void AddResultInCache(JSThread *thread, JSHandle cache, const JSHandle &pattern, const JSHandle &flags, const JSHandle &input, const JSHandle &resultArray, - CacheType type, uint32_t lastIndex); + CacheType type, uint32_t lastIndex, JSTaggedValue extend = JSTaggedValue::Undefined()); static void GrowRegexpCache(JSThread *thread, JSHandle cache); void ClearEntry(JSThread *thread, int entry); void SetEntry(JSThread *thread, int entry, JSTaggedValue &pattern, JSTaggedValue &flags, JSTaggedValue &input, - JSTaggedValue &lastIndexValue); + JSTaggedValue &lastIndexValue, JSTaggedValue &extendValue); void UpdateResultArray(JSThread *thread, int entry, JSTaggedValue resultArray, CacheType type); - bool Match(int entry, JSTaggedValue &pattern, JSTaggedValue &flagsStr, JSTaggedValue &inputStr); + bool Match(int entry, JSTaggedValue &patternStr, JSTaggedValue &flagsStr, JSTaggedValue &inputStr, + JSTaggedValue &extend); inline void SetHitCount(JSThread *thread, int hitCount) { Set(thread, CACHE_HIT_COUNT_INDEX, JSTaggedValue(hitCount)); @@ -207,7 +213,9 @@ private: static constexpr int RESULT_SPLIT_INDEX = 5; static constexpr int RESULT_MATCH_INDEX = 6; static constexpr int RESULT_EXEC_INDEX = 7; - static constexpr int ENTRY_SIZE = 8; + // Extend index used for saving an additional parameter to judge cached + static constexpr int EXTEND_INDEX = 8; + static constexpr int ENTRY_SIZE = 9; }; } // namespace panda::ecmascript::builtins #endif // ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H diff --git a/runtime/builtins/builtins_string.cpp b/runtime/builtins/builtins_string.cpp index 35c001f6aa79c2bef12c66bf8c3a131a42d2402e..4f81de1daed29a59044017049651491b006cf21b 100644 --- a/runtime/builtins/builtins_string.cpp +++ b/runtime/builtins/builtins_string.cpp @@ -34,6 +34,7 @@ #include "plugins/ecmascript/runtime/js_locale.h" #include "plugins/ecmascript/runtime/js_object-inl.h" #include "plugins/ecmascript/runtime/js_primitive_ref.h" +#include "plugins/ecmascript/runtime/js_regexp.h" #include "plugins/ecmascript/runtime/js_string_iterator.h" #include "plugins/ecmascript/runtime/js_tagged_value-inl.h" #include "plugins/ecmascript/runtime/object_factory.h" @@ -478,45 +479,23 @@ JSTaggedValue BuiltinsString::IndexOf(EcmaRuntimeCallInfo *argv) JSHandle thisTag(JSTaggedValue::RequireObjectCoercible(thread, GetThis(argv))); JSHandle thisHandle = JSTaggedValue::ToString(thread, thisTag); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - int32_t thisLen = thisHandle->GetLength(); + uint32_t thisLen = thisHandle->GetLength(); JSHandle searchHandle = JSTaggedValue::ToString(thread, searchTag); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - int32_t searchLen = searchHandle->GetLength(); + JSHandle posTag = BuiltinsString::GetCallArg(argv, 1); int32_t pos; - if (argv->GetArgsNumber() == 1) { + if (posTag->IsInt()) { + pos = posTag->GetInt(); + } else if (posTag->IsUndefined()) { pos = 0; } else { - JSHandle posTag = BuiltinsString::GetCallArg(argv, 1); JSTaggedNumber posVal = JSTaggedValue::ToInteger(thread, posTag); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); pos = posVal.ToInt32(); } - pos = std::min(std::max(pos, 0), thisLen); - if (thisHandle->IsUtf8() && searchHandle->IsUtf8()) { - std::string thisString = base::StringHelper::Utf8ToString(thisHandle->GetDataUtf8(), thisLen); - std::string searchString = base::StringHelper::Utf8ToString(searchHandle->GetDataUtf8(), searchLen); - int32_t res = base::StringHelper::Find(thisString, searchString, pos); - if (res >= 0 && res < thisLen) { - return GetTaggedInt(res); - } - return GetTaggedInt(-1); - } - std::u16string u16strThis; - std::u16string u16strSearch; - if (thisHandle->IsUtf16()) { - u16strThis = ecmascript::base::StringHelper::Utf16ToU16String(thisHandle->GetDataUtf16(), thisLen); - } else { - const uint8_t *uint8This = thisHandle->GetDataUtf8(); - u16strThis = ecmascript::base::StringHelper::Utf8ToU16String(uint8This, thisLen); - } - if (searchHandle->IsUtf16()) { - u16strSearch = ecmascript::base::StringHelper::Utf16ToU16String(searchHandle->GetDataUtf16(), searchLen); - } else { - const uint8_t *uint8Search = searchHandle->GetDataUtf8(); - u16strSearch = ecmascript::base::StringHelper::Utf8ToU16String(uint8Search, searchLen); - } - int32_t res = ecmascript::base::StringHelper::Find(u16strThis, u16strSearch, pos); - if (res >= 0 && res < thisLen) { + pos = std::min(std::max(pos, 0), static_cast(thisLen)); + int32_t res = thisHandle->IndexOf(*searchHandle, pos); + if (res >= 0 && res < static_cast(thisLen)) { return GetTaggedInt(res); } return GetTaggedInt(-1); @@ -601,6 +580,17 @@ JSTaggedValue BuiltinsString::Match(EcmaRuntimeCallInfo *argv) JSHandle thisTag(JSTaggedValue::RequireObjectCoercible(thread, GetThis(argv))); JSHandle regexp = BuiltinsString::GetCallArg(argv, 0); JSHandle matchTag = thread->GetEcmaVM()->GetGlobalEnv()->GetMatchSymbol(); + if (regexp->IsJSRegExp()) { + JSHandle cacheTable(thread->GetEcmaVM()->GetRegExpCache()); + JSHandle re(regexp); + JSHandle pattern(thread, re->GetOriginalSource()); + JSHandle flags(thread, re->GetOriginalFlags()); + JSTaggedValue cacheResult = + cacheTable->FindCachedResult(thread, pattern, flags, thisTag, RegExpExecResultCache::MATCH_TYPE, regexp); + if (cacheResult != JSTaggedValue::Undefined()) { + return cacheResult; + } + } if (!regexp->IsUndefined() && !regexp->IsNull()) { if (regexp->IsECMAObject()) { JSHandle matcher = JSObject::GetMethod(thread, regexp, matchTag); @@ -623,6 +613,67 @@ JSTaggedValue BuiltinsString::Match(EcmaRuntimeCallInfo *argv) return JSFunction::Invoke(thread, rx, matchTag, 1, arguments->GetArgv()); } +JSTaggedValue BuiltinsString::MatchAll(EcmaRuntimeCallInfo *argv) +{ + ASSERT(argv); + BUILTINS_API_TRACE(argv->GetThread(), String, MatchAll); + JSThread *thread = argv->GetThread(); + [[maybe_unused]] EcmaHandleScope handleScope(thread); + const GlobalEnvConstants *globalConst = thread->GlobalConstants(); + // 1. Let O be ? RequireObjectCoercible(this value). + JSHandle thisTag(JSTaggedValue::RequireObjectCoercible(thread, GetThis(argv))); + JSHandle regexp = BuiltinsString::GetCallArg(argv, 0); + JSHandle matchAllTag = thread->GetEcmaVM()->GetGlobalEnv()->GetMatchAllSymbol(); + JSHandle gvalue(globalConst->GetHandledGString()); + + // 2. If regexp is neither undefined nor null, then + if (!regexp->IsUndefined() && !regexp->IsNull()) { + // a. Let isRegExp be ? IsRegExp(searchValue). + bool isJSRegExp = JSObject::IsRegExp(thread, regexp); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + // b. If isRegExp is true, then + if (isJSRegExp) { + // i. Let flags be ? Get(searchValue, "flags"). + JSHandle flagsString(globalConst->GetHandledFlagsString()); + JSHandle flags = JSObject::GetProperty(thread, regexp, flagsString).GetValue(); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + // ii. Perform ? RequireObjectCoercible(flags). + JSTaggedValue::RequireObjectCoercible(thread, flags); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + // iii. If ? ToString(flags) does not contain "g", throw a TypeError exception. + JSHandle flagString = JSTaggedValue::ToString(thread, flags); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + int32_t pos = flagString->IndexOf(static_cast(gvalue->GetTaggedObject())); + if (pos == -1) { + THROW_TYPE_ERROR_AND_RETURN(thread, "matchAll called with a non-global RegExp argument", + JSTaggedValue::Exception()); + } + } + + if (regexp->IsECMAObject()) { + // c. c. Let matcher be ? GetMethod(regexp, @@matchAll). + // d. d. If matcher is not undefined, then + JSHandle matcher = JSObject::GetMethod(thread, regexp, matchAllTag); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + if (!matcher->IsUndefined()) { + ASSERT(matcher->IsJSFunction()); + // i. i. Return ? Call(matcher, regexp, « O »). + InternalCallParams *arguments = thread->GetInternalCallParams(); + arguments->MakeArgv(thisTag.GetTaggedValue()); + return JSFunction::Call(thread, matcher, regexp, 1, arguments->GetArgv()); + } + } + } + // 3. Let S be ? ToString(O). + JSHandle thisVal = JSTaggedValue::ToString(thread, thisTag); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + // 4. Let rx be ? RegExpCreate(regexp, "g"). + JSHandle rx(thread, BuiltinsRegExp::RegExpCreate(thread, regexp, gvalue)); + InternalCallParams *arguments = thread->GetInternalCallParams(); + arguments->MakeArgv(thisVal.GetTaggedValue()); + return JSFunction::Invoke(thread, rx, matchAllTag, 1, arguments->GetArgv()); +} + // 21.1.3.12 JSTaggedValue BuiltinsString::Normalize(EcmaRuntimeCallInfo *argv) { @@ -768,6 +819,19 @@ JSTaggedValue BuiltinsString::Replace(EcmaRuntimeCallInfo *argv) ObjectFactory *factory = ecmaVm->GetFactory(); + if (searchTag->IsJSRegExp() && replaceTag->IsString()) { + JSHandle cacheTable(thread->GetEcmaVM()->GetRegExpCache()); + JSHandle re(searchTag); + JSHandle pattern(thread, re->GetOriginalSource()); + JSHandle flags(thread, re->GetOriginalFlags()); + JSTaggedValue cacheResult = + cacheTable->FindCachedResult(thread, pattern, flags, thisTag, RegExpExecResultCache::REPLACE_TYPE, + searchTag, replaceTag.GetTaggedValue()); + if (cacheResult != JSTaggedValue::Undefined()) { + return cacheResult; + } + } + // If searchValue is neither undefined nor null, then if (searchTag->IsECMAObject()) { JSHandle replaceKey = env->GetReplaceSymbol(); @@ -819,12 +883,13 @@ JSTaggedValue BuiltinsString::Replace(EcmaRuntimeCallInfo *argv) arguments->GetArgv()); // 3: «matched, pos, and string» replHandle.Update(replStrDeocodeValue); } else { + JSHandle undefined = globalConst->GetHandledUndefined(); // Let captures be an empty List. - JSHandle capturesList = factory->NewTaggedArray(0); + JSHandle capturesList = factory->EmptyArray(); ASSERT_PRINT(replaceTag->IsString(), "replace must be string"); JSHandle replacement(thread, replaceTag->GetTaggedObject()); // Let replStr be GetSubstitution(matched, string, pos, captures, replaceValue) - replHandle.Update(GetSubstitution(thread, searchString, thisString, pos, capturesList, replacement)); + replHandle.Update(GetSubstitution(thread, searchString, thisString, pos, capturesList, undefined, replacement)); } JSHandle realReplaceStr = JSTaggedValue::ToString(thread, replHandle); // Let tailPos be pos + the number of code units in matched. @@ -836,38 +901,8 @@ JSTaggedValue BuiltinsString::Replace(EcmaRuntimeCallInfo *argv) JSHandle prefixString(thread, EcmaString::FastSubString(thisString, 0, pos, ecmaVm)); JSHandle suffixString( thread, EcmaString::FastSubString(thisString, tailPos, thisString->GetLength() - tailPos, ecmaVm)); - std::u16string stringBuilder; - bool canBeCompress = true; - if (prefixString->IsUtf16()) { - const uint16_t *data = prefixString->GetDataUtf16(); - stringBuilder += ecmascript::base::StringHelper::Utf16ToU16String(data, prefixString->GetLength()); - canBeCompress = false; - } else { - const uint8_t *data = prefixString->GetDataUtf8(); - stringBuilder += ecmascript::base::StringHelper::Utf8ToU16String(data, prefixString->GetLength()); - } - - if (realReplaceStr->IsUtf16()) { - const uint16_t *data = realReplaceStr->GetDataUtf16(); - stringBuilder += ecmascript::base::StringHelper::Utf16ToU16String(data, realReplaceStr->GetLength()); - canBeCompress = false; - } else { - const uint8_t *data = realReplaceStr->GetDataUtf8(); - stringBuilder += ecmascript::base::StringHelper::Utf8ToU16String(data, realReplaceStr->GetLength()); - } - - if (suffixString->IsUtf16()) { - const uint16_t *data = suffixString->GetDataUtf16(); - stringBuilder += ecmascript::base::StringHelper::Utf16ToU16String(data, suffixString->GetLength()); - canBeCompress = false; - } else { - const uint8_t *data = suffixString->GetDataUtf8(); - stringBuilder += ecmascript::base::StringHelper::Utf8ToU16String(data, suffixString->GetLength()); - } - - auto *char16tData = const_cast(stringBuilder.c_str()); - auto *uint16tData = reinterpret_cast(char16tData); - return factory->NewFromUtf16LiteralUnCheck(uint16tData, stringBuilder.size(), canBeCompress).GetTaggedValue(); + JSHandle tempString(thread, EcmaString::Concat(prefixString, realReplaceStr, ecmaVm)); + return JSTaggedValue(EcmaString::Concat(tempString, suffixString, ecmaVm)); } // ES2021 22.1.3.18 @@ -875,241 +910,183 @@ JSTaggedValue BuiltinsString::Replace(EcmaRuntimeCallInfo *argv) JSTaggedValue BuiltinsString::ReplaceAll(EcmaRuntimeCallInfo *argv) { ASSERT(argv); - BUILTINS_API_TRACE(argv->GetThread(), String, ReplaceAll); JSThread *thread = argv->GetThread(); + BUILTINS_API_TRACE(thread, String, ReplaceAll); [[maybe_unused]] EcmaHandleScope handleScope(thread); - // 1. Let O be ? RequireObjectCoercible(this value). JSHandle thisTag = JSTaggedValue::RequireObjectCoercible(thread, BuiltinsString::GetThis(argv)); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); auto ecmaVm = thread->GetEcmaVM(); - ObjectFactory *factory = ecmaVm->GetFactory(); JSHandle env = ecmaVm->GetGlobalEnv(); + const GlobalEnvConstants *globalConst = thread->GlobalConstants(); + JSHandle searchTag = BuiltinsString::GetCallArg(argv, 0); + JSHandle replaceTag = BuiltinsString::GetCallArg(argv, 1); - JSHandle searchValue = BuiltinsString::GetCallArg(argv, 0); - JSHandle replaceValue = BuiltinsString::GetCallArg(argv, 1); + ObjectFactory *factory = ecmaVm->GetFactory(); - // 2. If searchValue is neither undefined nor null, then - if (searchValue->IsECMAObject()) { + if (!searchTag->IsUndefined() && !searchTag->IsNull()) { // a. Let isRegExp be ? IsRegExp(searchValue). - bool isRegExp = JSObject::IsRegExp(thread, searchValue); + bool isJSRegExp = JSObject::IsRegExp(thread, searchTag); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - // b. If isRegExp is true, then - if (isRegExp) { + if (isJSRegExp) { // i. Let flags be ? Get(searchValue, "flags"). - JSHandle flagsString(factory->NewFromString("flags")); - JSHandle flags(JSObject::GetProperty(thread, searchValue, flagsString).GetValue()); + JSHandle flagsString(globalConst->GetHandledFlagsString()); + JSHandle flags = JSObject::GetProperty(thread, searchTag, flagsString).GetValue(); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - // ii. Perform ? RequireObjectCoercible(flags). JSTaggedValue::RequireObjectCoercible(thread, flags); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - // iii. If ? ToString(flags) does not contain "g", throw a TypeError exception. - JSHandle flags_str = JSTaggedValue::ToString(thread, flags); + JSHandle flagString = JSTaggedValue::ToString(thread, flags); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - std::string flags_std_str = base::StringHelper::ToStdString(flags_str.GetObject()); - if (flags_std_str.find('g') == std::string::npos) { - THROW_TYPE_ERROR_AND_RETURN(thread, "replaceAll must be called with a global RegExp", + JSHandle gString(globalConst->GetHandledGString()); + int32_t pos = flagString->IndexOf(*gString); + if (pos == -1) { + THROW_TYPE_ERROR_AND_RETURN(thread, + "string.prototype.replaceAll called with a non-global RegExp argument", JSTaggedValue::Exception()); } } - // c. Let replacer be ? GetMethod(searchValue, @@replace). JSHandle replaceKey = env->GetReplaceSymbol(); - JSHandle replacer = JSObject::GetMethod(thread, searchValue, replaceKey); + JSHandle replaceMethod = JSObject::GetMethod(thread, searchTag, replaceKey); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - // d. If replacer is not undefined, then - if (!replacer->IsUndefined()) { - // i. Return ? Call(replacer, searchValue, « O, replaceValue »). - InternalCallParams *arguments = thread->GetInternalCallParams(); // 2: « O, replaceValue » - arguments->MakeArgv(thisTag, replaceValue); - JSTaggedValue result = JSFunction::Call(thread, replacer, searchValue, 2U, arguments->GetArgv()); - RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - return result; + if (!replaceMethod->IsUndefined()) { + // i. Return ? Call(replacer, searchValue, «O, replaceValue»). + InternalCallParams *arguments = thread->GetInternalCallParams(); + arguments->MakeArgv(thisTag.GetTaggedValue(), replaceTag.GetTaggedValue()); + return JSFunction::Call(thread, replaceMethod, searchTag, 2, arguments->GetArgv()); } } // 3. Let string be ? ToString(O). JSHandle thisString = JSTaggedValue::ToString(thread, thisTag); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - // 4. Let searchString be ? ToString(searchValue). - JSHandle searchString = JSTaggedValue::ToString(thread, searchValue); + JSHandle searchString = JSTaggedValue::ToString(thread, searchTag); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - // 5. Let functionalReplace be IsCallable(replaceValue). // 6. If functionalReplace is false, then - if (!replaceValue->IsCallable()) { + if (!replaceTag->IsCallable()) { // a. Set replaceValue to ? ToString(replaceValue). - replaceValue = JSHandle(JSTaggedValue::ToString(thread, replaceValue)); + replaceTag = JSHandle(JSTaggedValue::ToString(thread, replaceTag)); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); } // 7. Let searchLength be the length of searchString. - uint32_t searchLength = searchString->GetLength(); - // 8. Let advanceBy be max(1, searchLength). - uint32_t advanceBy = searchLength > 1 ? searchLength : 1; - + int32_t searchLength = searchString->GetLength(); + int32_t advanceBy = std::max(1, searchLength); // 9. Let matchPositions be a new empty List. - JSHandle matchPositions = factory->NewTaggedArray(thisString->GetLength() + 1); - + std::u16string stringBuilder; + std::u16string stringPrefixString; + std::u16string stringRealReplaceStr; + std::u16string stringSuffixString; // 10. Let position be ! StringIndexOf(string, searchString, 0). - int32_t position = thisString->IndexOf(*searchString); - if (position == -1) { - return thisString.GetTaggedValue(); - } - - // 11. Repeat, while position is not -1, - uint32_t index = 0; - while (position != -1) { - // a. Append position to the end of matchPositions. - matchPositions->Set(thread, index++, JSTaggedValue(position)); - - // b. Set position to ! StringIndexOf(string, searchString, position + advanceBy). - position = thisString->IndexOf(*searchString, position + static_cast(advanceBy)); - } - size_t num_matches = index; - - // 12. Let endOfLastMatch be 0. - uint32_t endOfLastMatch = 0; - - // 13. Let result be the empty String. - JSHandle result = factory->NewFromString(""); - JSMutableHandle preserved(thread, JSTaggedValue::Undefined()); - - // 14. For each element p of matchPositions, do - for (size_t pos = 0; pos < num_matches; pos++) { - index = matchPositions->Get(pos).GetInt(); - // a. Let preserved be the substring of string from endOfLastMatch to p. - preserved.Update( - JSTaggedValue(EcmaString::FastSubString(thisString, endOfLastMatch, index - endOfLastMatch, ecmaVm))); - - // b. If functionalReplace is true, then - JSTaggedValue replacement; - if (replaceValue->IsCallable()) { - // i. Let replacement be ? ToString(? Call(replaceValue, undefined, « searchString, F(p), string »)). - + int32_t pos = thisString->IndexOf(*searchString); + int32_t endOfLastMatch = 0; + bool canBeCompress = true; + JSHandle undefined = globalConst->GetHandledUndefined(); + JSMutableHandle replHandle(thread, factory->GetEmptyString().GetTaggedValue()); + while (pos != -1) { + // If functionalReplace is true, then + if (replaceTag->IsCallable()) { + // Let replValue be Call(replaceValue, undefined, «matched, pos, and string»). InternalCallParams *arguments = thread->GetInternalCallParams(); - arguments->MakeArgv(searchString.GetTaggedValue(), JSTaggedValue(index), - thisString.GetTaggedValue()); // 3: « searchString, F(p), string » - JSTaggedValue result = - JSFunction::Call(thread, replaceValue, JSHandle(thread, JSTaggedValue::Undefined()), 3U, - arguments->GetArgv()); - - RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - - replacement = JSTaggedValue::ToString(thread, JSHandle(thread, result)).GetTaggedValue(); - RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - } else { - // i. Assert: Type(replaceValue) is String. - ASSERT(replaceValue->IsString()); - - // ii. Let captures be a new empty List. - JSHandle captures = factory->NewTaggedArray(0); - - // iii. Let replacement be ! GetSubstitution(searchString, string, p, captures, undefined, replaceValue). - replacement = - GetSubstitution(thread, searchString, thisString, index, captures, JSHandle(replaceValue)); - } - - // d. Set result to the string-concatenation of result, preserved, and replacement. - std::u16string stringBuilder; - if (result->IsUtf16()) { - const uint16_t *data = result->GetDataUtf16(); - stringBuilder += ecmascript::base::StringHelper::Utf16ToU16String(data, result->GetLength()); + arguments->MakeArgv(searchString.GetTaggedValue(), JSTaggedValue(pos), thisString.GetTaggedValue()); + JSTaggedValue replStrDeocodeValue = + JSFunction::Call(thread, replaceTag, undefined, 3U, arguments->GetArgv()); + replHandle.Update(replStrDeocodeValue); } else { - const uint8_t *data = result->GetDataUtf8(); - stringBuilder += ecmascript::base::StringHelper::Utf8ToU16String(data, result->GetLength()); + // Let captures be an empty List. + JSHandle capturesList = factory->NewTaggedArray(0); + ASSERT_PRINT(replaceTag->IsString(), "replace must be string"); + JSHandle replacement(thread, replaceTag->GetTaggedObject()); + // Let replStr be GetSubstitution(matched, string, pos, captures, replaceValue) + replHandle.Update( + GetSubstitution(thread, searchString, thisString, pos, capturesList, undefined, replacement)); } - - if (preserved->IsUtf16()) { - const uint16_t *data = preserved->GetDataUtf16(); - stringBuilder += ecmascript::base::StringHelper::Utf16ToU16String(data, preserved->GetLength()); - } else { - const uint8_t *data = preserved->GetDataUtf8(); - stringBuilder += ecmascript::base::StringHelper::Utf8ToU16String(data, preserved->GetLength()); - } - - JSHandle replacementHandle = JSHandle(thread, replacement); - if (replacementHandle->IsUtf16()) { - const uint16_t *data = replacementHandle->GetDataUtf16(); - stringBuilder += ecmascript::base::StringHelper::Utf16ToU16String(data, replacementHandle->GetLength()); + JSHandle realReplaceStr = JSTaggedValue::ToString(thread, replHandle); + // Let tailPos be pos + the number of code units in matched. + // Let newString be the String formed by concatenating the first pos code units of string, + // replStr, and the trailing substring of string starting at index tailPos. + // If pos is 0, the first element of the concatenation will be the + // empty String. + // Return newString. + JSHandle prefixString( + thread, EcmaString::FastSubString(thisString, endOfLastMatch, pos - endOfLastMatch, ecmaVm)); + if (prefixString->IsUtf16()) { + const uint16_t *data = prefixString->GetDataUtf16(); + stringPrefixString = base::StringHelper::Utf16ToU16String(data, prefixString->GetLength()); + canBeCompress = false; } else { - const uint8_t *data = replacementHandle->GetDataUtf8(); - stringBuilder += ecmascript::base::StringHelper::Utf8ToU16String(data, replacementHandle->GetLength()); + const uint8_t *data = prefixString->GetDataUtf8(); + stringPrefixString = base::StringHelper::Utf8ToU16String(data, prefixString->GetLength()); } - - auto *char16tData = const_cast(stringBuilder.c_str()); - auto *uint16tData = reinterpret_cast(char16tData); - result = factory->NewFromUtf16Literal(uint16tData, stringBuilder.size()); - - // e. Set endOfLastMatch to p + searchLength. - endOfLastMatch = index + searchLength; - } - - // 15. If endOfLastMatch < the length of string, then - if (endOfLastMatch < thisString->GetLength()) { - // a. Set result to the string-concatenation of result and the substring of string from endOfLastMatch. - std::u16string stringBuilder; - if (result->IsUtf16()) { - const uint16_t *data = result->GetDataUtf16(); - stringBuilder += ecmascript::base::StringHelper::Utf16ToU16String(data, result->GetLength()); + if (realReplaceStr->IsUtf16()) { + const uint16_t *data = realReplaceStr->GetDataUtf16(); + stringRealReplaceStr = base::StringHelper::Utf16ToU16String(data, realReplaceStr->GetLength()); + canBeCompress = false; } else { - const uint8_t *data = result->GetDataUtf8(); - stringBuilder += ecmascript::base::StringHelper::Utf8ToU16String(data, result->GetLength()); + const uint8_t *data = realReplaceStr->GetDataUtf8(); + stringRealReplaceStr = base::StringHelper::Utf8ToU16String(data, realReplaceStr->GetLength()); } - - EcmaString *sub = - EcmaString::FastSubString(thisString, endOfLastMatch, thisString->GetLength() - endOfLastMatch, ecmaVm); - - if (sub->IsUtf16()) { - const uint16_t *data = sub->GetDataUtf16(); - stringBuilder += ecmascript::base::StringHelper::Utf16ToU16String(data, sub->GetLength()); + stringBuilder.append(stringPrefixString); + stringBuilder.append(stringRealReplaceStr); + endOfLastMatch = pos + searchLength; + pos = thisString->IndexOf(*searchString, pos + advanceBy); + } + + if (endOfLastMatch < static_cast(thisString->GetLength())) { + JSHandle suffixString( + thread, + EcmaString::FastSubString(thisString, endOfLastMatch, thisString->GetLength() - endOfLastMatch, ecmaVm)); + if (suffixString->IsUtf16()) { + const uint16_t *data = suffixString->GetDataUtf16(); + stringSuffixString = base::StringHelper::Utf16ToU16String(data, suffixString->GetLength()); + canBeCompress = false; } else { - const uint8_t *data = sub->GetDataUtf8(); - stringBuilder += ecmascript::base::StringHelper::Utf8ToU16String(data, sub->GetLength()); + const uint8_t *data = suffixString->GetDataUtf8(); + stringSuffixString = base::StringHelper::Utf8ToU16String(data, suffixString->GetLength()); } - - auto *char16tData = const_cast(stringBuilder.c_str()); - auto *uint16tData = reinterpret_cast(char16tData); - result = factory->NewFromUtf16Literal(uint16tData, stringBuilder.size()); + stringBuilder = stringBuilder + stringSuffixString; } - // 16. Return result. - return result.GetTaggedValue(); + auto *char16tData = const_cast(stringBuilder.c_str()); + auto *uint16tData = reinterpret_cast(char16tData); + return factory->NewFromUtf16LiteralUnCheck(uint16tData, stringBuilder.length(), canBeCompress).GetTaggedValue(); } +// NOLINTNEXTLINE(readability-function-size) JSTaggedValue BuiltinsString::GetSubstitution(JSThread *thread, const JSHandle &matched, const JSHandle &srcString, int position, const JSHandle &captureList, + const JSHandle &namedCaptures, const JSHandle &replacement) { BUILTINS_API_TRACE(thread, String, GetSubstitution); auto ecmaVm = thread->GetEcmaVM(); ObjectFactory *factory = ecmaVm->GetFactory(); - JSHandle dollarString = factory->NewFromCanBeCompressString("$"); + JSHandle dollarString = JSHandle::Cast(thread->GlobalConstants()->GetHandledDollarString()); int32_t replaceLength = replacement->GetLength(); - int32_t tailPos = position + matched->GetLength(); + int32_t tailPos = position + static_cast(matched->GetLength()); int32_t nextDollarIndex = replacement->IndexOf(*dollarString, 0); if (nextDollarIndex < 0) { return replacement.GetTaggedValue(); } - std::u16string stringBuilder; bool canBeCompress = true; if (nextDollarIndex > 0) { if (replacement->IsUtf16()) { const uint16_t *data = replacement->GetDataUtf16(); - stringBuilder += ecmascript::base::StringHelper::Utf16ToU16String(data, nextDollarIndex); + stringBuilder += base::StringHelper::Utf16ToU16String(data, nextDollarIndex); canBeCompress = false; } else { const uint8_t *data = replacement->GetDataUtf8(); - stringBuilder += ecmascript::base::StringHelper::Utf8ToU16String(data, nextDollarIndex); + stringBuilder += base::StringHelper::Utf8ToU16String(data, nextDollarIndex); } } @@ -1220,6 +1197,39 @@ JSTaggedValue BuiltinsString::GetSubstitution(JSThread *thread, const JSHandleIsUndefined()) { + stringBuilder += '$'; + continueFromIndex = peekIndex; + break; + } + JSHandle greaterSymString = factory->NewFromStdString(">"); + int pos = replacement->IndexOf(*greaterSymString, peekIndex); + if (pos == -1) { + stringBuilder += '$'; + continueFromIndex = peekIndex; + break; + } + JSHandle groupName( + thread, EcmaString::FastSubString(replacement, peekIndex + 1, pos - peekIndex - 1, ecmaVm)); + JSHandle names(groupName); + JSHandle capture = JSObject::GetProperty(thread, namedCaptures, names).GetValue(); + if (capture->IsUndefined()) { + continueFromIndex = pos + 1; + break; + } + JSHandle captureName(capture); + if (captureName->IsUtf16()) { + const uint16_t *data = captureName->GetDataUtf16(); + stringBuilder += base::StringHelper::Utf16ToU16String(data, captureName->GetLength()); + canBeCompress = false; + } else { + const uint8_t *data = captureName->GetDataUtf8(); + stringBuilder += base::StringHelper::Utf8ToU16String(data, captureName->GetLength()); + } + continueFromIndex = pos + 1; + break; + } default: stringBuilder += '$'; continueFromIndex = peekIndex; @@ -1376,7 +1386,9 @@ JSTaggedValue BuiltinsString::Split(EcmaRuntimeCallInfo *argv) if (limitTag->IsUndefined()) { lim = UINT32_MAX - 1; } else { - lim = JSTaggedValue::ToInteger(thread, limitTag).ToUint32(); + JSTaggedNumber limVal = JSTaggedValue::ToInteger(thread, limitTag); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + lim = limVal.ToUint32(); } // ReturnIfAbrupt(lim). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); @@ -1396,7 +1408,7 @@ JSTaggedValue BuiltinsString::Split(EcmaRuntimeCallInfo *argv) } // If S.length = 0, then if (thisLength == 0) { - if (SplitMatch(thisString, 0, seperatorString) != -1) { + if (thisString->IndexOf(*seperatorString, 0) != -1) { return resultArray.GetTaggedValue(); } JSObject::CreateDataProperty(thread, resultArray, 0, JSHandle(thisString)); @@ -1404,32 +1416,35 @@ JSTaggedValue BuiltinsString::Split(EcmaRuntimeCallInfo *argv) return resultArray.GetTaggedValue(); } - // Let q = p. - // Repeat, while q ≠ s - int32_t p = 0; - int32_t q = p; - while (q != thisLength) { - int32_t matchedIndex = SplitMatch(thisString, q, seperatorString); - if (matchedIndex == -1) { - q = q + 1; - } else { - if (matchedIndex == p) { - q = q + 1; - } else { - EcmaString *elementString = EcmaString::FastSubString(thisString, p, q - p, ecmaVm); - JSHandle elementTag(thread, elementString); - JSObject::CreateDataProperty(thread, resultArray, arrayLength, elementTag); - ASSERT_PRINT(!thread->HasPendingException(), "CreateDataProperty can't throw exception"); - ++arrayLength; - if (arrayLength == lim) { - return resultArray.GetTaggedValue(); - } - p = matchedIndex; - q = p; + int32_t seperatorLength = seperatorString->GetLength(); + if (seperatorLength == 0) { + for (int32_t i = 0; i < thisLength; ++i) { + EcmaString *elementString = EcmaString::FastSubString(thisString, i, 1, ecmaVm); + JSHandle elementTag(thread, elementString); + JSObject::CreateDataProperty(thread, resultArray, arrayLength, elementTag); + ASSERT_PRINT(!thread->HasPendingException(), "CreateDataProperty can't throw exception"); + ++arrayLength; + if (arrayLength == lim) { + return resultArray.GetTaggedValue(); } } + return resultArray.GetTaggedValue(); + } + int32_t index = 0; + int32_t pos = thisString->IndexOf(*seperatorString); + while (pos != -1) { + EcmaString *elementString = EcmaString::FastSubString(thisString, index, pos - index, ecmaVm); + JSHandle elementTag(thread, elementString); + JSObject::CreateDataProperty(thread, resultArray, arrayLength, elementTag); + ASSERT_PRINT(!thread->HasPendingException(), "CreateDataProperty can't throw exception"); + ++arrayLength; + if (arrayLength == lim) { + return resultArray.GetTaggedValue(); + } + index = pos + seperatorLength; + pos = thisString->IndexOf(*seperatorString, index); } - EcmaString *elementString = EcmaString::FastSubString(thisString, p, thisLength - p, ecmaVm); + EcmaString *elementString = EcmaString::FastSubString(thisString, index, thisLength - index, ecmaVm); JSHandle elementTag(thread, elementString); JSObject::CreateDataProperty(thread, resultArray, arrayLength, elementTag); ASSERT_PRINT(!thread->HasPendingException(), "CreateDataProperty can't throw exception"); diff --git a/runtime/builtins/builtins_string.h b/runtime/builtins/builtins_string.h index 7d3ac3e90537db1254d6083fd2a17c6026d01d3f..bb507165ca3e296efb0f0e0cf684a8432c3b8a85 100644 --- a/runtime/builtins/builtins_string.h +++ b/runtime/builtins/builtins_string.h @@ -43,6 +43,7 @@ public: static JSTaggedValue GetSubstitution(JSThread *thread, const JSHandle &matched, const JSHandle &srcString, int position, const JSHandle &captureList, + const JSHandle &namedCaptures, const JSHandle &replacement); // 21.1.3.1 static JSTaggedValue CharAt(EcmaRuntimeCallInfo *argv); @@ -65,6 +66,8 @@ public: static JSTaggedValue LocaleCompare(EcmaRuntimeCallInfo *argv); // 21.1.3.11 static JSTaggedValue Match(EcmaRuntimeCallInfo *argv); + + static JSTaggedValue MatchAll(EcmaRuntimeCallInfo *argv); // 21.1.3.12 static JSTaggedValue Normalize(EcmaRuntimeCallInfo *argv); // ES2021 22.1.3.14 diff --git a/runtime/dump.cpp b/runtime/dump.cpp index 5fe42692ffa2c7f3efa233b773c29fd2ca2096d8..ef3370fc05d1291cb779eee8d5565baf7553fc52 100644 --- a/runtime/dump.cpp +++ b/runtime/dump.cpp @@ -62,6 +62,7 @@ #include "plugins/ecmascript/runtime/js_relative_time_format.h" #include "plugins/ecmascript/runtime/js_set.h" #include "plugins/ecmascript/runtime/js_set_iterator.h" +#include "plugins/ecmascript/runtime/js_regexp_iterator.h" #include "plugins/ecmascript/runtime/js_string_iterator.h" #include "plugins/ecmascript/runtime/js_tagged_number.h" #include "plugins/ecmascript/runtime/js_tagged_value-inl.h" @@ -176,6 +177,8 @@ PandaString JSHClass::DumpJSType(JSType type) return "ArrayIterator"; case JSType::JS_STRING_ITERATOR: return "StringIterator"; + case JSType::JS_REG_EXP_ITERATOR: + return "RegExpIterator"; case JSType::JS_ARRAY_BUFFER: return "ArrayBuffer"; case JSType::JS_PROXY_REVOC_FUNCTION: @@ -546,6 +549,9 @@ static void DumpObject(JSThread *thread, TaggedObject *obj, std::ostream &os) case JSType::JS_SET_ITERATOR: JSSetIterator::Cast(obj)->Dump(thread, os); break; + case JSType::JS_REG_EXP_ITERATOR: + JSRegExpIterator::Cast(obj)->Dump(thread, os); + break; case JSType::JS_ARRAY_ITERATOR: JSArrayIterator::Cast(obj)->Dump(thread, os); break; @@ -1100,6 +1106,20 @@ void JSSetIterator::Dump(JSThread *thread, std::ostream &os) const set->Dump(thread, os); } +void JSRegExpIterator::Dump(JSThread *thread, std::ostream &os) const +{ + os << " - IteratingRegExp: "; + GetIteratingRegExp().D(); + os << "\n"; + os << " - IteratedString: "; + GetIteratedString().D(); + os << "\n"; + os << " - Global: " << std::dec << GetGlobal() << "\n"; + os << " - Unicode: " << std::dec << GetUnicode() << "\n"; + os << " - Done: " << std::dec << GetDone() << "\n"; + JSObject::Dump(thread, os); +} + void JSArray::Dump(JSThread *thread, std::ostream &os) const { os << " - length: " << std::dec << GetArrayLength() << "\n"; @@ -1306,6 +1326,8 @@ void GlobalEnv::Dump(JSThread *thread, std::ostream &os) const GetMapIteratorPrototype().GetTaggedValue().Dump(thread, os); os << " - SetIteratorPrototype: "; GetSetIteratorPrototype().GetTaggedValue().Dump(thread, os); + os << " - RegExpIteratorPrototype: "; + GetRegExpIteratorPrototype().GetTaggedValue().Dump(thread, os); os << " - ArrayIteratorPrototype: "; GetArrayIteratorPrototype().GetTaggedValue().Dump(thread, os); os << " - StringIteratorPrototype: "; @@ -2138,6 +2160,7 @@ static void DumpObject(JSThread *thread, TaggedObject *obj, std::vectorDumpForSnapshot(thread, vec); return; @@ -2687,6 +2710,8 @@ void GlobalEnv::DumpForSnapshot([[maybe_unused]] JSThread *thread, vec.emplace_back(std::make_pair(PandaString("StringIterator"), GetStringIterator().GetTaggedValue())); vec.emplace_back(std::make_pair(PandaString("MapIteratorPrototype"), GetMapIteratorPrototype().GetTaggedValue())); vec.emplace_back(std::make_pair(PandaString("SetIteratorPrototype"), GetSetIteratorPrototype().GetTaggedValue())); + vec.emplace_back( + std::make_pair(PandaString("RegExpIteratorPrototype"), GetRegExpIteratorPrototype().GetTaggedValue())); vec.emplace_back( std::make_pair(PandaString("ArrayIteratorPrototype"), GetArrayIteratorPrototype().GetTaggedValue())); vec.emplace_back( diff --git a/runtime/ecma_macros.h b/runtime/ecma_macros.h index 792c52e4fb49e5e4bea012668db8ac2472ef8243..fc362a62e9b276e733b651589928819e99bd4542 100644 --- a/runtime/ecma_macros.h +++ b/runtime/ecma_macros.h @@ -100,6 +100,47 @@ static inline void UnalignedStore(T *p, T v) ObjectAccessor::SetPrimitive(this, offset, value.GetRawData()); \ } +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define DEFINE_ALIGN_SIZE(offset) \ + static constexpr size_t SIZE = ((offset) + sizeof(JSTaggedType) - 1U) & (~(sizeof(JSTaggedType) - 1U)) + +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define ACCESSORS_BIT_FIELD(name, offset, endOffset) \ + static constexpr size_t endOffset = (offset) + sizeof(uint32_t); \ + inline void Set##name(uint32_t value) \ + { \ + ObjectAccessor::SetPrimitive(this, offset, value); \ + } \ + inline uint32_t Get##name() const \ + { \ + return ObjectAccessor::GetDynValue(this, offset); \ + } \ + inline void Clear##name() \ + { \ + Set##name(0UL); \ + } + +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define SET_GET_BIT_FIELD(bitFieldName, name, type) \ + inline type Get##name() const \ + { \ + return name##Bits::Decode(Get##bitFieldName()); \ + } \ + inline void Set##name(type t) \ + { \ + Set##bitFieldName(name##Bits::Update(Get##bitFieldName(), t)); \ + } + +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define FIRST_BIT_FIELD(bitFieldName, name, type, bits) \ + using name##Bits = BitField; \ + SET_GET_BIT_FIELD(bitFieldName, name, type) + +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define NEXT_BIT_FIELD(bitFieldName, name, type, bits, lastName) \ + using name##Bits = lastName##Bits::NextField; \ + SET_GET_BIT_FIELD(bitFieldName, name, type) + // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define SET_GET_VOID_FIELD(name, offset, lastOffset) \ static constexpr size_t lastOffset = offset + JSTaggedValue::TaggedTypeSize(); \ diff --git a/runtime/ecma_string.cpp b/runtime/ecma_string.cpp index d62248bc55a5bdd39cc187d8d3029c241a3fefc9..e50c20bea809c41363ae328b277516084cf6eb4b 100644 --- a/runtime/ecma_string.cpp +++ b/runtime/ecma_string.cpp @@ -201,12 +201,12 @@ int32_t EcmaString::IndexOf(const EcmaString *rhs, int32_t pos) const int32_t lhsCount = lhs->GetLength(); int32_t rhsCount = rhs->GetLength(); - if (rhsCount == 0 && pos <= lhsCount) { - return pos; + if (pos > lhsCount) { + return -1; } - if (pos >= lhsCount) { - return -1; + if (rhsCount == 0) { + return pos; } if (pos < 0) { diff --git a/runtime/global_env.h b/runtime/global_env.h index 67c101ada691edd494c2ff5af9500f1da3e5050d..33978dc373792d12922c165138656fa2d1ed95de 100644 --- a/runtime/global_env.h +++ b/runtime/global_env.h @@ -101,6 +101,7 @@ class JSThread; V(JSTaggedValue, IteratorSymbol, ITERATOR_SYMBOL_INDEX) \ V(JSTaggedValue, AsyncIteratorSymbol, ASYNC_ITERATOR_SYMBOL_INDEX) \ V(JSTaggedValue, MatchSymbol, MATCH_SYMBOL_INDEX) \ + V(JSTaggedValue, MatchAllSymbol, MATCH_All_SYMBOL_INDEX) \ V(JSTaggedValue, ReplaceSymbol, REPLACE_SYMBOL_INDEX) \ V(JSTaggedValue, SearchSymbol, SEARCH_SYMBOL_INDEX) \ V(JSTaggedValue, SpeciesSymbol, SPECIES_SYMBOL_INDEX) \ @@ -116,6 +117,7 @@ class JSThread; V(JSTaggedValue, StringIterator, STRING_ITERATOR_INDEX) \ V(JSTaggedValue, MapIteratorPrototype, MAP_ITERATOR_PROTOTYPE_INDEX) \ V(JSTaggedValue, SetIteratorPrototype, SET_ITERATOR_PROTOTYPE_INDEX) \ + V(JSTaggedValue, RegExpIteratorPrototype, REGEXP_ITERATOR_PROTOTYPE_INDEX) \ V(JSTaggedValue, ArrayIteratorPrototype, ARRAY_ITERATOR_PROTOTYPE_INDEX) \ V(JSTaggedValue, StringIteratorPrototype, STRING_ITERATOR_PROTOTYPE_INDEX) \ /* SymbolTable *RegisterSymbols */ \ diff --git a/runtime/global_env_constants.cpp b/runtime/global_env_constants.cpp index 8ecb034f4a6702b296a788186a1ef3e2645e5f7d..5bdfbd97ae36d7f009af07ba8710c4854c15dd1b 100644 --- a/runtime/global_env_constants.cpp +++ b/runtime/global_env_constants.cpp @@ -46,6 +46,7 @@ #include "plugins/ecmascript/runtime/js_proxy.h" #include "plugins/ecmascript/runtime/js_realm.h" #include "plugins/ecmascript/runtime/js_regexp.h" +#include "plugins/ecmascript/runtime/js_regexp_iterator.h" #include "plugins/ecmascript/runtime/js_set.h" #include "plugins/ecmascript/runtime/js_set_iterator.h" #include "plugins/ecmascript/runtime/js_symbol.h" @@ -176,6 +177,11 @@ void GlobalEnvConstants::InitRootsClass([[maybe_unused]] JSThread *thread, JSHCl classInfoExtractorClass->GetHClass()->MarkFieldAsNative(ClassInfoExtractor::CONSTRUCTOR_METHOD_OFFSET); SetConstant(ConstantIndex::CLASS_INFO_EXTRACTOR_HCLASS_INDEX, JSTaggedValue(classInfoExtractorClass)); + JSHClass *jsRegexpIteratorClass = + *factory->NewEcmaDynClass(dynClassClass, JSRegExpIterator::SIZE, JSType::JS_REG_EXP_ITERATOR, 0); + jsRegexpIteratorClass->GetHClass()->MarkFieldAsNative(JSRegExpIterator::BIT_FIELD_OFFSET); + SetConstant(ConstantIndex::JS_REGEXP_ITERATOR_CLASS_INDEX, JSTaggedValue(jsRegexpIteratorClass)); + SetConstant(ConstantIndex::LINKED_HASH_SET_CLASS_INDEX, factory->NewEcmaDynClass(dynClassClass, 0, JSType::LINKED_HASH_SET, HClass::ARRAY).GetTaggedValue()); SetConstant(ConstantIndex::LINKED_HASH_MAP_CLASS_INDEX, @@ -499,6 +505,13 @@ void GlobalEnvConstants::InitGlobalConstant(JSThread *thread) SetConstant(ConstantIndex::ZERO_INDEX, factory->NewFromCanBeCompressString("0").GetTaggedValue()); SetConstant(ConstantIndex::VALUES_INDEX, factory->NewFromCanBeCompressString("values").GetTaggedValue()); + SetConstant(ConstantIndex::FLAGS_INDEX, factory->NewFromCanBeCompressString("flags").GetTaggedValue()); + SetConstant(ConstantIndex::BACKSLASH_INDEX, factory->NewFromCanBeCompressString("/").GetTaggedValue()); + SetConstant(ConstantIndex::G_INDEX, factory->NewFromCanBeCompressString("g").GetTaggedValue()); + SetConstant(ConstantIndex::GROUPS_STRING_INDEX, factory->NewFromCanBeCompressString("groups").GetTaggedValue()); + SetConstant(ConstantIndex::Y_INDEX, factory->NewFromCanBeCompressString("y").GetTaggedValue()); + SetConstant(ConstantIndex::DOLLAR_INDEX, factory->NewFromCanBeCompressString("$").GetTaggedValue()); + auto accessor = factory->NewInternalAccessor(reinterpret_cast(JSFunction::PrototypeSetter), reinterpret_cast(JSFunction::PrototypeGetter)); SetConstant(ConstantIndex::FUNCTION_PROTOTYPE_ACCESSOR, accessor.GetTaggedValue()); diff --git a/runtime/global_env_constants.h b/runtime/global_env_constants.h index d0d7e705c8aa9649d4c50ec6f6adacea81c2837e..a775df693a1631edaa32ff72bbba49e087e3d15c 100644 --- a/runtime/global_env_constants.h +++ b/runtime/global_env_constants.h @@ -73,6 +73,7 @@ class JSThread; V(JSTaggedValue, LinkedHashMapClass, LINKED_HASH_MAP_CLASS_INDEX, ecma_roots_class) \ V(JSTaggedValue, WeakLinkedHashMapClass, WEAK_LINKED_HASH_MAP_CLASS_INDEX, ecma_roots_class) \ V(JSTaggedValue, JSRegExpClass, JS_REGEXP_CLASS_INDEX, ecma_roots_class) \ + V(JSTaggedValue, JSRegExpIteratorClass, JS_REGEXP_ITERATOR_CLASS_INDEX, ecma_roots_class) \ V(JSTaggedValue, ClassInfoExtractorHClass, CLASS_INFO_EXTRACTOR_HCLASS_INDEX, ecma_roots_class) \ V(JSTaggedValue, FinalizationRegistryClass, FINALIZATION_REGISTRY_CLASS_INDEX, ecma_root_class) @@ -298,7 +299,13 @@ class JSThread; V(JSTaggedValue, IndexString, INDEX_INDEX, index) \ V(JSTaggedValue, InputString, INPUT_INDEX, input) \ V(JSTaggedValue, UnicodeString, UNICODE_INDEX, unicode) \ - V(JSTaggedValue, ZeroString, ZERO_INDEX, zero) + V(JSTaggedValue, ZeroString, ZERO_INDEX, zero) \ + V(JSTaggedValue, FlagsString, FLAGS_INDEX, flags) \ + V(JSTaggedValue, BackslashString, BACKSLASH_INDEX, backslash) \ + V(JSTaggedValue, GString, G_INDEX, g) \ + V(JSTaggedValue, GroupsString, GROUPS_STRING_INDEX, groups) \ + V(JSTaggedValue, YString, Y_INDEX, y) \ + V(JSTaggedValue, DollarString, DOLLAR_INDEX, dollar) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define GLOBAL_ENV_CONSTANT_ACCESSOR(V) \ diff --git a/runtime/js_hclass.h b/runtime/js_hclass.h index dfa8e46e7add1b1a104d296f1dba4ce5b749e43d..5912f9bf19e60253492d67a9cc15399f7876d09e 100644 --- a/runtime/js_hclass.h +++ b/runtime/js_hclass.h @@ -97,8 +97,9 @@ class ProtoChangeDetails; JS_WEAK_SET, /* ///////////////////////////////////////////////////////////////////////////////////-PADDING */ \ JS_DATE, /* ///////////////////////////////////////////////////////////////////////////////////-PADDING */ \ JS_ITERATOR, /* ///////////////////////////////////////////////////////////////////////////////////-PADDING */ \ - JS_FORIN_ITERATOR, /* ////////////////////////////////////////////////////////////////////////////-PADDING */ \ - JS_MAP_ITERATOR, /* ////////////////////////////////////////////////////////////////////////////-PADDING */ \ + JS_FORIN_ITERATOR, /* /////////////////////////////////////////////////////////////////////////////-PADDING */ \ + JS_MAP_ITERATOR, /* /////////////////////////////////////////////////////////////////////////////-PADDING */ \ + JS_REG_EXP_ITERATOR, /* ///////////////////////////////////////////////////////////////////////////-PADDING */ \ JS_SET_ITERATOR, /* ////////////////////////////////////////////////////////////////////////////-PADDING */ \ JS_ARRAY_ITERATOR, /* ////////////////////////////////////////////////////////////////////////////-PADDING */ \ JS_STRING_ITERATOR, /* ////////////////////////////////////////////////////////////////////////////-PADDING */ \ @@ -695,6 +696,11 @@ public: return GetObjectType() == JSType::JS_SET_ITERATOR; } + inline bool IsJSRegExpIterator() const + { + return GetObjectType() == JSType::JS_REG_EXP_ITERATOR; + } + inline bool IsJSMapIterator() const { return GetObjectType() == JSType::JS_MAP_ITERATOR; diff --git a/runtime/js_object-inl.h b/runtime/js_object-inl.h index ba5797fae9fb061311a8e2fc012e4001749a7b0a..7ab71cddd8591ad8e95ecc2f4476fd688fe3536b 100644 --- a/runtime/js_object-inl.h +++ b/runtime/js_object-inl.h @@ -156,6 +156,11 @@ inline bool JSObject::IsJSSetIterator() const return GetJSHClass()->IsJSSetIterator(); } +inline bool JSObject::IsJSRegExpIterator() const +{ + return GetJSHClass()->IsJSRegExpIterator(); +} + inline bool JSObject::IsJSMapIterator() const { return GetJSHClass()->IsJSMapIterator(); diff --git a/runtime/js_object.h b/runtime/js_object.h index 7dcab9c4c30b592ecab1fbe40818629e1b895e07..40b610f73adc962ee53d6cbe2e20a2c3f34521ee 100644 --- a/runtime/js_object.h +++ b/runtime/js_object.h @@ -542,6 +542,7 @@ public: bool IsGeneratorObject() const; bool IsForinIterator() const; bool IsJSSetIterator() const; + bool IsJSRegExpIterator() const; bool IsJSMapIterator() const; bool IsJSArrayIterator() const; bool IsJSPrimitiveRef() const; diff --git a/runtime/js_regexp.h b/runtime/js_regexp.h index b8bba30d0508243867ed49f3d4f770a014ecc094..56db1ef120d0819c7dafc1a56930893f5f1a87ff 100644 --- a/runtime/js_regexp.h +++ b/runtime/js_regexp.h @@ -30,7 +30,8 @@ public: ACCESSORS(LastIndex, LAST_INDEX_OFFSET, REGEXP_BYTE_CODE_OFFSET); ACCESSORS(ByteCodeBuffer, REGEXP_BYTE_CODE_OFFSET, ORIGINAL_SOURCE_OFFSET) ACCESSORS(OriginalSource, ORIGINAL_SOURCE_OFFSET, ORIGINAL_FLAGS_OFFSET) - ACCESSORS(OriginalFlags, ORIGINAL_FLAGS_OFFSET, LENGTH_OFFSET) + ACCESSORS(OriginalFlags, ORIGINAL_FLAGS_OFFSET, GROUP_NAME_OFFSET) + ACCESSORS(GroupName, GROUP_NAME_OFFSET, LENGTH_OFFSET) ACCESSORS(Length, LENGTH_OFFSET, SIZE) DECL_VISIT_OBJECT_FOR_JS_OBJECT(JSObject, LAST_INDEX_OFFSET, SIZE) diff --git a/runtime/js_regexp_iterator.cpp b/runtime/js_regexp_iterator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..192741d5a22f84a423cd9b713cbc73dc994f177d --- /dev/null +++ b/runtime/js_regexp_iterator.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2021-2022 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugins/ecmascript/runtime/js_regexp_iterator.h" + +#include "plugins/ecmascript/runtime/builtins/builtins_errors.h" +#include "plugins/ecmascript/runtime/builtins/builtins_regexp.h" +#include "plugins/ecmascript/runtime/interpreter/fast_runtime_stub-inl.h" +#include "plugins/ecmascript/runtime/js_tagged_value.h" +#include "plugins/ecmascript/runtime/object_factory.h" + +namespace panda::ecmascript { +using BuiltinsBase = base::BuiltinsBase; +using BuiltinsRegExp = builtins::BuiltinsRegExp; +JSTaggedValue JSRegExpIterator::Next(EcmaRuntimeCallInfo *argv) +{ + ASSERT(argv); + JSThread *thread = argv->GetThread(); + [[maybe_unused]] EcmaHandleScope handleScope(thread); + + // 1. Let O be the this value. + JSHandle input(BuiltinsBase::GetThis(argv)); + // 2. If Type(O) is not Object, throw a TypeError exception. + // 3. If O does not have all of the internal slots of a RegExp String Iterator Object Instance + // (see 21.2.7.2), throw a TypeError exception. + if (!input->IsJSRegExpIterator()) { + THROW_TYPE_ERROR_AND_RETURN(thread, "this value is not a regExp iterator", JSTaggedValue::Exception()); + } + + JSHandle undefinedHandle(thread->GlobalConstants()->GetHandledUndefined()); + JSHandle jsIterator = JSHandle::Cast(input); + // 4. If O.[[Done]] is true, then + // a. Return ! CreateIterResultObject(undefined, true). + if (jsIterator->GetDone()) { + return JSIterator::CreateIterResultObject(thread, undefinedHandle, true).GetTaggedValue(); + } + + // 5. Let R be O.[[IteratingRegExp]]. + // 6. Let S be O.[[IteratedString]]. + // 7. Let global be O.[[Global]]. + // 8. Let fullUnicode be O.[[Unicode]]. + JSHandle regexHandle(thread, jsIterator->GetIteratingRegExp()); + JSHandle inputStr(thread, jsIterator->GetIteratedString()); + bool global = jsIterator->GetGlobal(); + bool fullUnicode = jsIterator->GetUnicode(); + + // 9. Let match be ? RegExpExec(R, S). + JSTaggedValue match = BuiltinsRegExp::RegExpExec(thread, regexHandle, inputStr, false); + JSHandle matchHandle(thread, match); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + + // 10. If match is null, then + // a. Set O.[[Done]] to true. + // b. Return ! CreateIterResultObject(undefined, true). + // Else, + if (matchHandle->IsNull()) { + jsIterator->SetDone(true); + return JSIterator::CreateIterResultObject(thread, undefinedHandle, true).GetTaggedValue(); + } + // a. If global is true, then + // i. Let matchStr be ? ToString(? Get(match, "0")). + if (global) { + const GlobalEnvConstants *globalConstants = thread->GlobalConstants(); + JSHandle zeroString(globalConstants->GetHandledZeroString()); + JSHandle getZero(JSObject::GetProperty(thread, matchHandle, zeroString).GetValue()); + JSHandle matchStr = JSTaggedValue::ToString(thread, getZero); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + // ii. If matchStr is the empty String, then + // 1. Let thisIndex be ? ToLength(? Get(R, "lastIndex")). + // 2. Let nextIndex be ! AdvanceStringIndex(S, thisIndex, fullUnicode). + // 3. Perform ? Set(R, "lastIndex", 𝔽(nextIndex), true). + if (matchStr->GetLength() == 0) { + JSHandle lastIndexString(globalConstants->GetHandledLastIndexString()); + JSHandle getLastIndex( + JSObject::GetProperty(thread, regexHandle, lastIndexString).GetValue()); + JSTaggedNumber thisIndex = JSTaggedValue::ToLength(thread, getLastIndex); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + uint32_t nextIndex = BuiltinsRegExp::AdvanceStringIndex(inputStr, thisIndex.ToUint32(), fullUnicode); + FastRuntimeStub::FastSetPropertyByValue(thread, regexHandle.GetTaggedValue(), + lastIndexString.GetTaggedValue(), JSTaggedValue(nextIndex)); + } + // iii. Return ! CreateIterResultObject(match, false). + return JSIterator::CreateIterResultObject(thread, matchHandle, false).GetTaggedValue(); + } + // b. Else, + // i. Set O.[[Done]] to true. + // ii. Return ! CreateIterResultObject(match, false). + jsIterator->SetDone(true); + return JSIterator::CreateIterResultObject(thread, matchHandle, false).GetTaggedValue(); +} + +JSHandle JSRegExpIterator::CreateRegExpStringIterator(JSThread *thread, + const JSHandle &matcher, + const JSHandle &inputStr, bool global, + bool fullUnicode) +{ + ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); + if (!matcher->IsJSRegExp()) { + JSHandle undefinedHandle(thread->GlobalConstants()->GetHandledUndefined()); + THROW_TYPE_ERROR_AND_RETURN(thread, "matcher is not JSRegExp", undefinedHandle); + } + JSHandle iter(factory->NewJSRegExpIterator(matcher, inputStr, global, fullUnicode)); + return iter; +} +} // namespace panda::ecmascript diff --git a/runtime/js_regexp_iterator.h b/runtime/js_regexp_iterator.h new file mode 100644 index 0000000000000000000000000000000000000000..994d948f221166f24785af5556cd6c17770879c0 --- /dev/null +++ b/runtime/js_regexp_iterator.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ECMASCRIPT_JS_REGEXP_ITERATOR_H +#define ECMASCRIPT_JS_REGEXP_ITERATOR_H + +#include "plugins/ecmascript/runtime/js_object.h" +#include "plugins/ecmascript/runtime/js_iterator.h" + +namespace panda::ecmascript { +class JSRegExpIterator : public JSObject { +public: + CAST_CHECK(JSRegExpIterator, IsJSRegExpIterator); + + // 22.2.7.1 CreateRegExpStringIterator ( R, S, global, fullUnicode ) + static JSHandle CreateRegExpStringIterator(JSThread *thread, const JSHandle &matcher, + const JSHandle &inputStr, bool global, + bool fullUnicode); + + static JSTaggedValue Next(EcmaRuntimeCallInfo *argv); + + static constexpr size_t ITERATING_REGEXP_OFFSET = JSObject::SIZE; + ACCESSORS(IteratingRegExp, ITERATING_REGEXP_OFFSET, ITERATED_STRING_OFFSET); + ACCESSORS(IteratedString, ITERATED_STRING_OFFSET, BIT_FIELD_OFFSET); + ACCESSORS_BIT_FIELD(BitField, BIT_FIELD_OFFSET, LAST_OFFSET) + DEFINE_ALIGN_SIZE(LAST_OFFSET); + + // define BitField + static constexpr size_t REGEXP_GLOBAL_BITS = 1; + static constexpr size_t REGEXP_UNICODE_BITS = 1; + static constexpr size_t REGEXP_DONE_BITS = 1; + FIRST_BIT_FIELD(BitField, Global, bool, REGEXP_GLOBAL_BITS) + NEXT_BIT_FIELD(BitField, Unicode, bool, REGEXP_UNICODE_BITS, Global) + NEXT_BIT_FIELD(BitField, Done, bool, REGEXP_DONE_BITS, Unicode) + + DECL_VISIT_OBJECT_FOR_JS_OBJECT(JSObject, ITERATING_REGEXP_OFFSET, BIT_FIELD_OFFSET) + + DECL_DUMP() +}; +} // namespace panda::ecmascript + +#endif // ECMASCRIPT_JS_REGEXP_ITERATOR_H diff --git a/runtime/js_serializer.h b/runtime/js_serializer.h index 484664557a9b81204f09414e4094dc47e5cd45bb..859165ccd1356c24f60ed641cbf1612d4e25ceee 100644 --- a/runtime/js_serializer.h +++ b/runtime/js_serializer.h @@ -25,7 +25,7 @@ #include "plugins/ecmascript/runtime/js_object.h" #include "plugins/ecmascript/runtime/js_thread.h" #include "plugins/ecmascript/runtime/js_typed_array.h" -#include "plugins/ecmascript/runtime/regexp/dyn_chunk.h" +#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" namespace panda::ecmascript { enum class SerializationUID : uint8_t { diff --git a/runtime/js_tagged_value-inl.h b/runtime/js_tagged_value-inl.h index 67d1db572ae641fe0ace095bdfd702e9639f4fe5..093e8465aa40e68e3ac0f18093953fe859d7ffa2 100644 --- a/runtime/js_tagged_value-inl.h +++ b/runtime/js_tagged_value-inl.h @@ -794,6 +794,11 @@ inline bool JSTaggedValue::IsJSSetIterator() const return IsHeapObject() && GetTaggedObject()->GetClass()->IsJSSetIterator(); } +inline bool JSTaggedValue::IsJSRegExpIterator() const +{ + return IsHeapObject() && GetTaggedObject()->GetClass()->IsJSRegExpIterator(); +} + inline bool JSTaggedValue::IsJSMapIterator() const { return IsHeapObject() && GetTaggedObject()->GetClass()->IsJSMapIterator(); diff --git a/runtime/js_tagged_value.cpp b/runtime/js_tagged_value.cpp index 00944e2946a35992897e3b33b96df259f7276480..568ccfd689af5aeeea729c842bd51944709b632f 100644 --- a/runtime/js_tagged_value.cpp +++ b/runtime/js_tagged_value.cpp @@ -265,7 +265,7 @@ JSTaggedValue JSTaggedValue::ToPrimitive(JSThread *thread, const JSHandleGetEcmaVM(); JSHandle keyString = vm->GetGlobalEnv()->GetToPrimitiveSymbol(); - JSHandle exoticToprim = GetProperty(thread, tagged, keyString).GetValue(); + JSHandle exoticToprim = JSObject::GetMethod(thread, tagged, keyString); RETURN_VALUE_IF_ABRUPT_COMPLETION(thread, JSTaggedValue::Exception()); if (!exoticToprim->IsUndefined()) { JSTaggedValue value = GetTypeString(thread, type).GetTaggedValue(); diff --git a/runtime/js_tagged_value.h b/runtime/js_tagged_value.h index 65334f5bddd7828336b7ca0b6b18603744fa24c4..342c1ac03f0e2fb09b4c04a4c7cb1e068ba0741b 100644 --- a/runtime/js_tagged_value.h +++ b/runtime/js_tagged_value.h @@ -277,6 +277,7 @@ public: bool IsArrayBuffer() const; bool IsJSSetIterator() const; + bool IsJSRegExpIterator() const; bool IsJSMapIterator() const; bool IsJSArrayIterator() const; bool IsIterator() const; diff --git a/runtime/regexp/dyn_chunk.cpp b/runtime/mem/dyn_chunk.cpp similarity index 98% rename from runtime/regexp/dyn_chunk.cpp rename to runtime/mem/dyn_chunk.cpp index ab04b98b82d5cd547914c147bcfd705dcc0c1fff..3416e8f585bd646b982648a661d7d880ec476e2d 100644 --- a/runtime/regexp/dyn_chunk.cpp +++ b/runtime/mem/dyn_chunk.cpp @@ -13,7 +13,7 @@ * limitations under the License. */ -#include "plugins/ecmascript/runtime/regexp/dyn_chunk.h" +#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" #include "runtime/include/runtime.h" #include "securec.h" diff --git a/runtime/regexp/dyn_chunk.h b/runtime/mem/dyn_chunk.h similarity index 100% rename from runtime/regexp/dyn_chunk.h rename to runtime/mem/dyn_chunk.h diff --git a/runtime/mem/object_xray-inl.h b/runtime/mem/object_xray-inl.h index 1e2544a87d1d406327d9bed85cc9a43e3aec5db9..b1410b32c74bc68af0e03ab8c15579e90ddd2acc 100644 --- a/runtime/mem/object_xray-inl.h +++ b/runtime/mem/object_xray-inl.h @@ -46,6 +46,7 @@ #include "plugins/ecmascript/runtime/js_locale.h" #include "plugins/ecmascript/runtime/js_map.h" #include "plugins/ecmascript/runtime/js_map_iterator.h" +#include "plugins/ecmascript/runtime/js_regexp_iterator.h" #include "plugins/ecmascript/runtime/js_number_format.h" #include "plugins/ecmascript/runtime/js_object-inl.h" #include "plugins/ecmascript/runtime/js_plural_rules.h" @@ -153,6 +154,9 @@ void ObjectXRay::VisitObjectBody(TaggedObject *object, JSHClass *klass, const Ec case JSType::JS_SET_ITERATOR: JSSetIterator::Cast(object)->VisitRangeSlot(visitor); break; + case JSType::JS_REG_EXP_ITERATOR: + JSRegExpIterator::Cast(object)->VisitRangeSlot(visitor); + break; case JSType::JS_ARRAY_ITERATOR: JSArrayIterator::Cast(object)->VisitRangeSlot(visitor); break; diff --git a/runtime/object_factory.cpp b/runtime/object_factory.cpp index 45f1fb16f44cde7871d64186d831e7f10c8bf662..e3716c256ee93d6e95906ca0b462fd3208edf86b 100644 --- a/runtime/object_factory.cpp +++ b/runtime/object_factory.cpp @@ -60,6 +60,7 @@ #include "plugins/ecmascript/runtime/js_proxy.h" #include "plugins/ecmascript/runtime/js_realm.h" #include "plugins/ecmascript/runtime/js_regexp.h" +#include "plugins/ecmascript/runtime/js_regexp_iterator.h" #include "plugins/ecmascript/runtime/js_set.h" #include "plugins/ecmascript/runtime/js_set_iterator.h" #include "plugins/ecmascript/runtime/js_string_iterator.h" @@ -784,6 +785,7 @@ JSHandle ObjectFactory::NewJSObjectByConstructor(const JSHandleSetByteCodeBuffer(thread_, JSTaggedValue::Undefined()); JSRegExp::Cast(*obj)->SetOriginalSource(thread_, JSTaggedValue::Undefined()); JSRegExp::Cast(*obj)->SetOriginalFlags(thread_, JSTaggedValue(0)); + JSRegExp::Cast(*obj)->SetGroupName(thread_, JSTaggedValue::Undefined()); JSRegExp::Cast(*obj)->SetLength(thread_, JSTaggedValue(0)); break; case JSType::JS_PRIMITIVE_REF: @@ -847,6 +849,7 @@ JSHandle ObjectFactory::NewJSObjectByConstructor(const JSHandle ObjectFactory::NewJSSetIterator(const JSHandle &s return iter; } +JSHandle ObjectFactory::NewJSRegExpIterator(const JSHandle &matcher, + const JSHandle &inputStr, bool global, + bool fullUnicode) +{ + JSHandle env = vm_->GetGlobalEnv(); + JSHandle protoValue = env->GetRegExpIteratorPrototype(); + // JSHandle dynHandle(thread_->GlobalConstants()->GetHandledJSRegExpIteratorClass()); + // dynHandle->SetPrototype(thread_, protoValue); // TODO(vpukhov): set prototype once + JSHandle dynHandle = NewEcmaDynClass(JSRegExpIterator::SIZE, JSType::JS_REG_EXP_ITERATOR, protoValue); + dynHandle->GetHClass()->MarkFieldAsNative(JSRegExpIterator::BIT_FIELD_OFFSET); + JSHandle iter(NewJSObject(dynHandle)); + iter->GetJSHClass()->SetExtensible(true); + iter->SetIteratingRegExp(thread_, matcher.GetTaggedValue()); + iter->SetIteratedString(thread_, inputStr.GetTaggedValue()); + iter->SetGlobal(global); + iter->SetUnicode(fullUnicode); + iter->SetDone(false); + return iter; +} + JSHandle ObjectFactory::NewJSMapIterator(const JSHandle &map, IterationKind kind) { JSHandle env = vm_->GetGlobalEnv(); diff --git a/runtime/object_factory.h b/runtime/object_factory.h index b4bb35605a35b4ed95800ffb042ad2f0f33d8f62..0387b981ef0ecd866a4b6ed56348be29dae5f1cc 100644 --- a/runtime/object_factory.h +++ b/runtime/object_factory.h @@ -56,6 +56,7 @@ class JSMap; class JSWeakRef; class JSRegExp; class JSSetIterator; +class JSRegExpIterator; class JSMapIterator; class JSArrayIterator; class JSStringIterator; @@ -286,6 +287,9 @@ public: JSHandle NewJSSetIterator(const JSHandle &set, IterationKind kind); + JSHandle NewJSRegExpIterator(const JSHandle &matcher, + const JSHandle &inputStr, bool global, bool fullUnicode); + JSHandle NewJSMapIterator(const JSHandle &map, IterationKind kind); JSHandle NewJSArrayIterator(const JSHandle &array, IterationKind kind); diff --git a/runtime/regexp/regexp_executor.cpp b/runtime/regexp/regexp_executor.cpp index 8e550cc5812c14723d313435e64ed7406a6c6a38..20b8ac2edf8a59bf76e13507ce46b651d690ffc5 100644 --- a/runtime/regexp/regexp_executor.cpp +++ b/runtime/regexp/regexp_executor.cpp @@ -16,7 +16,7 @@ #include "plugins/ecmascript/runtime/regexp/regexp_executor.h" #include "plugins/ecmascript/runtime/base/string_helper.h" -#include "plugins/ecmascript/runtime/regexp/dyn_chunk.h" +#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" #include "plugins/ecmascript/runtime/regexp/regexp_opcode.h" #include "securec.h" @@ -103,371 +103,6 @@ bool RegExpExecutor::MatchFailed(bool isMatched) return true; } -bool RegExpExecutor::HandleFirstSplit() -{ - if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 && - (flags_ & RegExpParser::FLAG_STICKY) == 0) { - if (IsEOF()) { - if (MatchFailed()) { - return false; - } - } else { - AdvanceCurrentPtr(); - PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); - } - } - return true; -} - -bool RegExpExecutor::HandleOpAll(uint8_t opCode) -{ - if (IsEOF()) { - return !MatchFailed(); - } - uint32_t currentChar = GetCurrentChar(); - if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) { - return !MatchFailed(); - } - Advance(opCode); - return true; -} - -bool RegExpExecutor::HandleOpChar(const DynChunk &byteCode, uint8_t opCode) -{ - uint32_t expectedChar; - if (opCode == RegExpOpCode::OP_CHAR32) { - expectedChar = byteCode.GetU32(GetCurrentPC() + 1); - } else { - expectedChar = byteCode.GetU16(GetCurrentPC() + 1); - } - if (IsEOF()) { - return !MatchFailed(); - } - uint32_t currentChar = GetCurrentChar(); - if (IsIgnoreCase()) { - currentChar = RegExpParser::Canonicalize(currentChar, IsUtf16()); - } - if (currentChar == expectedChar) { - Advance(opCode); - } else { - if (MatchFailed()) { - return false; - } - } - return true; -} - -bool RegExpExecutor::HandleOpWordBoundary(uint8_t opCode) -{ - if (IsEOF()) { - if (opCode == RegExpOpCode::OP_WORD_BOUNDARY) { - Advance(opCode); - } else { - if (MatchFailed()) { - return false; - } - } - return true; - } - bool preIsWord = false; - if (GetCurrentPtr() != input_) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_)); - } - bool currentIsWord = IsWordChar(PeekChar(currentPtr_, inputEnd_)); - if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) && - ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) || - ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) && - ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) { - Advance(opCode); - } else { - if (MatchFailed()) { - return false; - } - } - return true; -} - -bool RegExpExecutor::HandleOpLineStart(uint8_t opCode) -{ - if (IsEOF()) { - return !MatchFailed(); - } - if ((GetCurrentPtr() == input_) || - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) { - Advance(opCode); - } else { - if (MatchFailed()) { - return false; - } - } - return true; -} - -bool RegExpExecutor::HandleOpLineEnd(uint8_t opCode) -{ - if (IsEOF() || - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) { - Advance(opCode); - } else { - if (MatchFailed()) { - return false; - } - } - return true; -} - -void RegExpExecutor::HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode) -{ - uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); - ASSERT(captureIndex < nCapture_); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - CaptureState *captureState = &captureResultList_[captureIndex]; - captureState->captureStart = GetCurrentPtr(); - Advance(opCode); -} - -void RegExpExecutor::HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode) -{ - uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); - ASSERT(captureIndex < nCapture_); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - CaptureState *captureState = &captureResultList_[captureIndex]; - captureState->captureEnd = GetCurrentPtr(); - Advance(opCode); -} - -void RegExpExecutor::HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode) -{ - uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START); - uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END); - for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) { - CaptureState *captureState = &captureResultList_[i]; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - captureState->captureStart = nullptr; - captureState->captureEnd = nullptr; - } - Advance(opCode); -} - -void RegExpExecutor::HandleOpMatch(const DynChunk &byteCode, uint8_t opCode) -{ - auto type = static_cast(opCode - RegExpOpCode::OP_SPLIT_NEXT); - ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD); - uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); - Advance(opCode); - uint32_t splitPc = GetCurrentPC() + offset; - PushRegExpState(type, splitPc); -} - -void RegExpExecutor::HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode) -{ - uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); - Advance(opCode); - PushRegExpState(STATE_SPLIT, GetCurrentPC()); - AdvanceOffset(offset); -} - -bool RegExpExecutor::HandleOpPrev(uint8_t opCode) -{ - if (GetCurrentPtr() == input_) { - if (MatchFailed()) { - return false; - } - } else { - PrevPtr(¤tPtr_, input_); - Advance(opCode); - } - return true; -} - -void RegExpExecutor::HandleOpLoop(const DynChunk &byteCode, uint8_t opCode) -{ - uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET); - uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET); - uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET); - Advance(opCode); - uint32_t loopPcEnd = GetCurrentPC(); - uint32_t loopPcStart = GetCurrentPC() + pcOffset; - bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY; - uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin; - - uint32_t loopCount = PeekStack(); - SetStackValue(++loopCount); - if (loopCount < loopMax) { - // greedy failed, goto next - if (loopCount >= quantifyMin) { - PushRegExpState(STATE_SPLIT, loopPcEnd); - } - // Goto loop start - SetCurrentPC(loopPcStart); - } else { - if (!isGreedy && (loopCount < quantifyMax)) { - PushRegExpState(STATE_SPLIT, loopPcStart); - } - } -} - -bool RegExpExecutor::HandleOpRange32(const DynChunk &byteCode) -{ - if (IsEOF()) { - return !MatchFailed(); - } - uint32_t currentChar = GetCurrentChar(); - if (IsIgnoreCase()) { - currentChar = RegExpParser::Canonicalize(currentChar, IsUtf16()); - } - uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); - bool isFound = false; - int32_t idxMin = 0; - int32_t idxMax = rangeCount - 1; - int32_t idx = 0; - uint32_t low = 0; - uint32_t high = - byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET + RANGE32_MAX_HALF_OFFSET); - if (currentChar <= high) { - while (idxMin <= idxMax) { - idx = (idxMin + idxMax) / RANGE32_OFFSET; - low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_OFFSET); - high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_OFFSET + - RANGE32_MAX_HALF_OFFSET); - if (currentChar < low) { - idxMax = idx - 1; - } else if (currentChar > high) { - idxMin = idx + 1; - } else { - isFound = true; - break; - } - } - } - if (isFound) { - AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET); - } else { - if (MatchFailed()) { - return false; - } - } - return true; -} - -bool RegExpExecutor::HandleOpRange(const DynChunk &byteCode) -{ - if (IsEOF()) { - return !MatchFailed(); - } - uint32_t currentChar = GetCurrentChar(); - if (IsIgnoreCase()) { - currentChar = RegExpParser::Canonicalize(currentChar, IsUtf16()); - } - uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); - bool isFound = false; - int32_t idxMin = 0; - int32_t idxMax = rangeCount - 1; - int32_t idx = 0; - uint32_t low = 0; - uint32_t high = - byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); - if (currentChar <= high) { - while (idxMin <= idxMax) { - idx = (idxMin + idxMax) / RANGE32_OFFSET; - low = byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_HALF_OFFSET); - high = - byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); - if (currentChar < low) { - idxMax = idx - 1; - } else if (currentChar > high) { - idxMin = idx + 1; - } else { - isFound = true; - break; - } - } - } - if (isFound) { - AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET); - } else { - if (MatchFailed()) { - return false; - } - } - return true; -} - -bool RegExpExecutor::HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode) -{ - uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); - if (captureIndex >= nCapture_) { - return !MatchFailed(); - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - const uint8_t *captureStart = captureResultList_[captureIndex].captureStart; - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd; - if (captureStart == nullptr || captureEnd == nullptr) { - Advance(opCode); - return true; - } - bool isMatched = true; - if (opCode == RegExpOpCode::OP_BACKREFERENCE) { - const uint8_t *refCptr = captureStart; - while (refCptr < captureEnd) { - if (IsEOF()) { - isMatched = false; - break; - } - // NOLINTNEXTLINE(readability-identifier-naming) - uint32_t c1 = GetChar(&refCptr, captureEnd); - // NOLINTNEXTLINE(readability-identifier-naming) - uint32_t c2 = GetChar(¤tPtr_, inputEnd_); - if (IsIgnoreCase()) { - c1 = RegExpParser::Canonicalize(c1, IsUtf16()); - c2 = RegExpParser::Canonicalize(c2, IsUtf16()); - } - if (c1 != c2) { - isMatched = false; - break; - } - } - if (!isMatched) { - if (MatchFailed()) { - return false; - } - } else { - Advance(opCode); - } - } else { - const uint8_t *refCptr = captureEnd; - while (refCptr > captureStart) { - if (GetCurrentPtr() == input_) { - isMatched = false; - break; - } - // NOLINTNEXTLINE(readability-identifier-naming) - uint32_t c1 = GetPrevChar(&refCptr, captureStart); - // NOLINTNEXTLINE(readability-identifier-naming) - uint32_t c2 = GetPrevChar(¤tPtr_, input_); - if (IsIgnoreCase()) { - c1 = RegExpParser::Canonicalize(c1, IsUtf16()); - c2 = RegExpParser::Canonicalize(c2, IsUtf16()); - } - if (c1 != c2) { - isMatched = false; - break; - } - } - if (!isMatched) { - if (MatchFailed()) { - return false; - } - } else { - Advance(opCode); - } - } - return true; -} - // NOLINTNEXTLINE(readability-function-size) bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd) { @@ -479,33 +114,38 @@ bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd) uint8_t opCode = byteCode.GetU8(GetCurrentPC()); switch (opCode) { case RegExpOpCode::OP_DOTS: - case RegExpOpCode::OP_ALL: + case RegExpOpCode::OP_ALL: { if (!HandleOpAll(opCode)) { return false; } break; + } case RegExpOpCode::OP_CHAR32: - case RegExpOpCode::OP_CHAR: + case RegExpOpCode::OP_CHAR: { if (!HandleOpChar(byteCode, opCode)) { return false; } break; + } case RegExpOpCode::OP_NOT_WORD_BOUNDARY: - case RegExpOpCode::OP_WORD_BOUNDARY: + case RegExpOpCode::OP_WORD_BOUNDARY: { if (!HandleOpWordBoundary(opCode)) { return false; } break; - case RegExpOpCode::OP_LINE_START: + } + case RegExpOpCode::OP_LINE_START: { if (!HandleOpLineStart(opCode)) { return false; } break; - case RegExpOpCode::OP_LINE_END: + } + case RegExpOpCode::OP_LINE_END: { if (!HandleOpLineEnd(opCode)) { return false; } break; + } case RegExpOpCode::OP_SAVE_START: HandleOpSaveStart(byteCode, opCode); break; @@ -515,16 +155,17 @@ bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd) case RegExpOpCode::OP_GOTO: { uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); Advance(opCode, offset); - } break; + break; + } case RegExpOpCode::OP_MATCH: { // jump to match ahead if (MatchFailed(true)) { return false; } - } break; - case RegExpOpCode::OP_MATCH_END: { + break; + } + case RegExpOpCode::OP_MATCH_END: return true; - } break; case RegExpOpCode::OP_SAVE_RESET: HandleOpSaveReset(byteCode, opCode); break; @@ -536,11 +177,12 @@ bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd) case RegExpOpCode::OP_SPLIT_FIRST: HandleOpSplitFirst(byteCode, opCode); break; - case RegExpOpCode::OP_PREV: + case RegExpOpCode::OP_PREV: { if (!HandleOpPrev(opCode)) { return false; } break; + } case RegExpOpCode::OP_LOOP_GREEDY: case RegExpOpCode::OP_LOOP: HandleOpLoop(byteCode, opCode); @@ -548,7 +190,8 @@ bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd) case RegExpOpCode::OP_PUSH_CHAR: { PushStack(reinterpret_cast(GetCurrentPtr())); Advance(opCode); - } break; + break; + } case RegExpOpCode::OP_CHECK_CHAR: { if (PopStack() != reinterpret_cast(GetCurrentPtr())) { Advance(opCode); @@ -556,31 +199,37 @@ bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd) uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); Advance(opCode, offset); } - } break; + break; + } case RegExpOpCode::OP_PUSH: { PushStack(0); Advance(opCode); - } break; + break; + } case RegExpOpCode::OP_POP: { PopStack(); Advance(opCode); - } break; - case RegExpOpCode::OP_RANGE32: + break; + } + case RegExpOpCode::OP_RANGE32: { if (!HandleOpRange32(byteCode)) { return false; } break; - case RegExpOpCode::OP_RANGE: + } + case RegExpOpCode::OP_RANGE: { if (!HandleOpRange(byteCode)) { return false; } break; + } case RegExpOpCode::OP_BACKREFERENCE: - case RegExpOpCode::OP_BACKWARD_BACKREFERENCE: + case RegExpOpCode::OP_BACKWARD_BACKREFERENCE: { if (!HandleOpBackReference(byteCode, opCode)) { return false; } break; + } default: UNREACHABLE(); } @@ -628,8 +277,8 @@ MatchResult RegExpExecutor::GetResult(const JSThread *thread, bool isSuccess) co pair.first = false; if (isWideChar_) { // create utf-16 string - pair.second = factory->NewFromUtf16UnCheck( - reinterpret_cast(captureState->captureStart), len / 2, false); + pair.second = + factory->NewFromUtf16(reinterpret_cast(captureState->captureStart), len / 2); } else { // create utf-8 string PandaVector buffer(len + 1); @@ -640,8 +289,7 @@ MatchResult RegExpExecutor::GetResult(const JSThread *thread, bool isSuccess) co UNREACHABLE(); } dest[len] = '\0'; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - pair.second = - factory->NewFromUtf8UnCheck(reinterpret_cast(buffer.data()), len, true); + pair.second = factory->NewFromUtf8(reinterpret_cast(buffer.data()), len); } } else { // undefined @@ -661,9 +309,8 @@ MatchResult RegExpExecutor::GetResult(const JSThread *thread, bool isSuccess) co void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc) { ReAllocStack(stateStackLen_ + 1); - auto state = - reinterpret_cast(stateStack_ + // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - stateStackLen_ * stateSize_); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + auto state = reinterpret_cast(stateStack_ + stateStackLen_ * stateSize_); state->type_ = type; state->currentPc_ = pc; state->currentStack_ = currentStack_; @@ -673,9 +320,8 @@ void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc) LOG_ECMA(FATAL) << "memcpy_s failed"; UNREACHABLE(); } - uint8_t *stackStart = - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - reinterpret_cast(state->captureResultList_) + sizeof(CaptureState) * nCapture_; + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + uint8_t *stackStart = reinterpret_cast(state->captureResultList_) + sizeof(CaptureState) * nCapture_; if (stack_ != nullptr) { size_t stackSize = sizeof(uintptr_t) * nStack_; if (memcpy_s(stackStart, stackSize, stack_, stackSize) != EOK) { diff --git a/runtime/regexp/regexp_executor.h b/runtime/regexp/regexp_executor.h index a3c89828333f313b72bb85acaec16c1490a23f7f..3cda6c0f2520fa5d5b11138cd53cd6fc905863b4 100644 --- a/runtime/regexp/regexp_executor.h +++ b/runtime/regexp/regexp_executor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Huawei Device Co., Ltd. + * Copyright (c) 2021 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -64,26 +64,377 @@ public: bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false); bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd); - bool HandleFirstSplit(); - bool HandleOpAll(uint8_t opCode); - bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode); - bool HandleOpWordBoundary(uint8_t opCode); - bool HandleOpLineStart(uint8_t opCode); - bool HandleOpLineEnd(uint8_t opCode); - void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode); - void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode); - void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode); - void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode); - void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode); - bool HandleOpPrev(uint8_t opCode); - void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode); - bool HandleOpRange32(const DynChunk &byteCode); - bool HandleOpRange(const DynChunk &byteCode); - bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode); + inline bool HandleFirstSplit() + { + if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 && + (flags_ & RegExpParser::FLAG_STICKY) == 0) { + if (IsEOF()) { + if (MatchFailed()) { + return false; + } + } else { + AdvanceCurrentPtr(); + PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); + } + } + return true; + } + + inline bool HandleOpAll(uint8_t opCode) + { + if (IsEOF()) { + return !MatchFailed(); + } + uint32_t currentChar = GetCurrentChar(); + if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) { + return !MatchFailed(); + } + Advance(opCode); + return true; + } + + inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode) + { + uint32_t expectedChar; + if (opCode == RegExpOpCode::OP_CHAR32) { + expectedChar = byteCode.GetU32(GetCurrentPC() + 1); + } else { + expectedChar = byteCode.GetU16(GetCurrentPC() + 1); + } + if (IsEOF()) { + return !MatchFailed(); + } + uint32_t currentChar = GetCurrentChar(); + if (IsIgnoreCase()) { + currentChar = static_cast(RegExpParser::Canonicalize(currentChar, IsUtf16())); + } + if (currentChar == expectedChar) { + Advance(opCode); + } else { + if (MatchFailed()) { + return false; + } + } + return true; + } + + inline bool HandleOpWordBoundary(uint8_t opCode) + { + if (IsEOF()) { + if (opCode == RegExpOpCode::OP_WORD_BOUNDARY) { + Advance(opCode); + } else { + if (MatchFailed()) { + return false; + } + } + return true; + } + bool preIsWord = false; + if (GetCurrentPtr() != input_) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_)); + } + bool currentIsWord = IsWordChar(PeekChar(currentPtr_, inputEnd_)); + if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) && + ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) || + ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) && + ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) { + Advance(opCode); + } else { + if (MatchFailed()) { + return false; + } + } + return true; + } + + inline bool HandleOpLineStart(uint8_t opCode) + { + if (IsEOF()) { + return !MatchFailed(); + } + if ((GetCurrentPtr() == input_) || + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) { + Advance(opCode); + } else { + if (MatchFailed()) { + return false; + } + } + return true; + } + + inline bool HandleOpLineEnd(uint8_t opCode) + { + if (IsEOF() || + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) { + Advance(opCode); + } else { + if (MatchFailed()) { + return false; + } + } + return true; + } + + inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode) + { + uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); + ASSERT(captureIndex < nCapture_); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + CaptureState *captureState = &captureResultList_[captureIndex]; + captureState->captureStart = GetCurrentPtr(); + Advance(opCode); + } + + inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode) + { + uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); + ASSERT(captureIndex < nCapture_); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + CaptureState *captureState = &captureResultList_[captureIndex]; + captureState->captureEnd = GetCurrentPtr(); + Advance(opCode); + } + + inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode) + { + uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START); + uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END); + for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + CaptureState *captureState = &captureResultList_[i]; + captureState->captureStart = nullptr; + captureState->captureEnd = nullptr; + } + Advance(opCode); + } + + inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode) + { + auto type = static_cast(opCode - RegExpOpCode::OP_SPLIT_NEXT); + ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD); + uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); + Advance(opCode); + uint32_t splitPc = GetCurrentPC() + offset; + PushRegExpState(type, splitPc); + } + + inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode) + { + uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); + Advance(opCode); + PushRegExpState(STATE_SPLIT, GetCurrentPC()); + AdvanceOffset(offset); + } + + inline bool HandleOpPrev(uint8_t opCode) + { + if (GetCurrentPtr() == input_) { + if (MatchFailed()) { + return false; + } + } else { + PrevPtr(¤tPtr_, input_); + Advance(opCode); + } + return true; + } + + inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode) + { + uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET); + uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET); + uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET); + Advance(opCode); + uint32_t loopPcEnd = GetCurrentPC(); + uint32_t loopPcStart = GetCurrentPC() + pcOffset; + bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY; + uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin; + + uint32_t loopCount = PeekStack(); + SetStackValue(++loopCount); + if (loopCount < loopMax) { + // greedy failed, goto next + if (loopCount >= quantifyMin) { + PushRegExpState(STATE_SPLIT, loopPcEnd); + } + // Goto loop start + SetCurrentPC(loopPcStart); + } else { + if (!isGreedy && (loopCount < quantifyMax)) { + PushRegExpState(STATE_SPLIT, loopPcStart); + } + } + } + + inline bool HandleOpRange32(const DynChunk &byteCode) + { + if (IsEOF()) { + return !MatchFailed(); + } + uint32_t currentChar = GetCurrentChar(); + if (IsIgnoreCase()) { + currentChar = static_cast(RegExpParser::Canonicalize(currentChar, IsUtf16())); + } + uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); + bool isFound = false; + int32_t idxMin = 0; + int32_t idxMax = static_cast(rangeCount) - 1; + int32_t idx = 0; + uint32_t low = 0; + uint32_t high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET + + RANGE32_MAX_HALF_OFFSET); + if (currentChar <= high) { + while (idxMin <= idxMax) { + idx = (idxMin + idxMax) / RANGE32_OFFSET; + low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + + static_cast(idx) * RANGE32_MAX_OFFSET); + high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + + static_cast(idx) * RANGE32_MAX_OFFSET + RANGE32_MAX_HALF_OFFSET); + if (currentChar < low) { + idxMax = idx - 1; + } else if (currentChar > high) { + idxMin = idx + 1; + } else { + isFound = true; + break; + } + } + } + if (isFound) { + AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET); + } else { + if (MatchFailed()) { + return false; + } + } + return true; + } + + inline bool HandleOpRange(const DynChunk &byteCode) + { + if (IsEOF()) { + return !MatchFailed(); + } + uint32_t currentChar = GetCurrentChar(); + if (IsIgnoreCase()) { + currentChar = static_cast(RegExpParser::Canonicalize(currentChar, IsUtf16())); + } + uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); + bool isFound = false; + int32_t idxMin = 0; + int32_t idxMax = rangeCount - 1; + int32_t idx = 0; + uint32_t low = 0; + uint32_t high = + byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); + if (currentChar <= high) { + while (idxMin <= idxMax) { + idx = (idxMin + idxMax) / RANGE32_OFFSET; + low = byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + + static_cast(idx) * RANGE32_MAX_HALF_OFFSET); + high = byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + + static_cast(idx) * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); + if (currentChar < low) { + idxMax = idx - 1; + } else if (currentChar > high) { + idxMin = idx + 1; + } else { + isFound = true; + break; + } + } + } + if (isFound) { + AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET); + } else { + if (MatchFailed()) { + return false; + } + } + return true; + } + + inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode) + { + uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); + if (captureIndex >= nCapture_) { + return !MatchFailed(); + } + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + const uint8_t *captureStart = captureResultList_[captureIndex].captureStart; + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd; + if (captureStart == nullptr || captureEnd == nullptr) { + Advance(opCode); + return true; + } + bool isMatched = true; + if (opCode == RegExpOpCode::OP_BACKREFERENCE) { + const uint8_t *refCptr = captureStart; + while (refCptr < captureEnd) { + if (IsEOF()) { + isMatched = false; + break; + } + // NOLINTNEXTLINE(readability-identifier-naming) + uint32_t c1 = GetChar(&refCptr, captureEnd); + // NOLINTNEXTLINE(readability-identifier-naming) + uint32_t c2 = GetChar(¤tPtr_, inputEnd_); + if (IsIgnoreCase()) { + c1 = static_cast(RegExpParser::Canonicalize(c1, IsUtf16())); + c2 = static_cast(RegExpParser::Canonicalize(c2, IsUtf16())); + } + if (c1 != c2) { + isMatched = false; + break; + } + } + if (!isMatched) { + if (MatchFailed()) { + return false; + } + } else { + Advance(opCode); + } + } else { + const uint8_t *refCptr = captureEnd; + while (refCptr > captureStart) { + if (GetCurrentPtr() == input_) { + isMatched = false; + break; + } + // NOLINTNEXTLINE(readability-identifier-naming) + uint32_t c1 = GetPrevChar(&refCptr, captureStart); + // NOLINTNEXTLINE(readability-identifier-naming) + uint32_t c2 = GetPrevChar(¤tPtr_, input_); + if (IsIgnoreCase()) { + c1 = static_cast(RegExpParser::Canonicalize(c1, IsUtf16())); + c2 = static_cast(RegExpParser::Canonicalize(c2, IsUtf16())); + } + if (c1 != c2) { + isMatched = false; + break; + } + } + if (!isMatched) { + if (MatchFailed()) { + return false; + } + } else { + Advance(opCode); + } + } + return true; + } inline void Advance(uint8_t opCode, uint32_t offset = 0) { - currentPc_ += offset + RegExpOpCode::GetRegExpOpCode(opCode)->GetSize(); + currentPc_ += offset + static_cast(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize()); } inline void AdvanceOffset(uint32_t offset) @@ -110,14 +461,14 @@ public: c = *cptr; *pp += 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } else { - uint16_t c1 = *(uint16_t *)cptr; // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) + uint16_t c1 = *(reinterpret_cast(cptr)); c = c1; cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { - c1 = *(uint16_t *)cptr; // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) + c1 = *(reinterpret_cast(cptr)); if (U16_IS_TRAIL(c1)) { - c = U16_GET_SUPPLEMENTARY(c, c1); // NOLINT(hicpp-signed-bitwise) - cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + c = static_cast(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINT(hicpp-signed-bitwise) + cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } } *pp = cptr; @@ -132,13 +483,13 @@ public: if (!isWideChar_) { c = *cptr; } else { - uint16_t c1 = *(uint16_t *)cptr; // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) + uint16_t c1 = *reinterpret_cast(cptr); c = c1; cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { - c1 = *(uint16_t *)cptr; // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) + c1 = *reinterpret_cast(cptr); if (U16_IS_TRAIL(c1)) { - c = U16_GET_SUPPLEMENTARY(c, c1); // NOLINT(hicpp-signed-bitwise) + c = static_cast(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINT(hicpp-signed-bitwise) } } } @@ -151,10 +502,10 @@ public: if (!isWideChar_) { *pp += 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } else { - uint16_t c1 = *(uint16_t *)cptr; // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) - cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + uint16_t c1 = *reinterpret_cast(cptr); + cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) { - c1 = *(uint16_t *)cptr; // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) + c1 = *reinterpret_cast(cptr); if (U16_IS_TRAIL(c1)) { cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } @@ -168,16 +519,16 @@ public: uint32_t c; const uint8_t *cptr = p; if (!isWideChar_) { - c = *(cptr - 1); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + c = cptr[-1]; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } else { cptr -= WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) uint16_t c1 = *reinterpret_cast(cptr); c = c1; if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - c1 = (reinterpret_cast(cptr))[-1]; + c1 = reinterpret_cast(cptr)[-1]; if (U16_IS_LEAD(c1)) { - c = U16_GET_SUPPLEMENTARY(c1, c); // NOLINT(hicpp-signed-bitwise) + c = static_cast(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINT(hicpp-signed-bitwise) } } } @@ -189,20 +540,19 @@ public: uint32_t c; const uint8_t *cptr = *pp; if (!isWideChar_) { - c = *(cptr - 1); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - cptr -= 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + c = cptr[-1]; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + cptr -= 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) *pp = cptr; } else { cptr -= WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) uint16_t c1 = *reinterpret_cast(cptr); c = c1; if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - c1 = (reinterpret_cast(cptr))[-1]; + c1 = reinterpret_cast(cptr)[-1]; if (U16_IS_LEAD(c1)) { - c = U16_GET_SUPPLEMENTARY(c1, c); // NOLINT(hicpp-signed-bitwise) - cptr -= WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + c = static_cast(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINT(hicpp-signed-bitwise) + cptr -= WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } } *pp = cptr; @@ -218,11 +568,10 @@ public: *pp = cptr; } else { cptr -= WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) uint16_t c1 = *reinterpret_cast(cptr); if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - c1 = (reinterpret_cast(cptr))[-1]; + c1 = reinterpret_cast(cptr)[-1]; if (U16_IS_LEAD(c1)) { cptr -= WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } @@ -307,8 +656,8 @@ public: RegExpState *PeekRegExpState() const { ASSERT(stateStackLen_ >= 1); - return reinterpret_cast(stateStack_ + // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - (stateStackLen_ - 1) * stateSize_); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return reinterpret_cast(stateStack_ + (stateStackLen_ - 1) * stateSize_); } void ReAllocStack(uint32_t stackLen); diff --git a/runtime/regexp/regexp_opcode.cpp b/runtime/regexp/regexp_opcode.cpp index a25a6dbf538006bfdb568b77f6367303f040af96..767a92ede0d625ac69d4f4066e16b9ac5bcd9a72 100644 --- a/runtime/regexp/regexp_opcode.cpp +++ b/runtime/regexp/regexp_opcode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Huawei Device Co., Ltd. + * Copyright (c) 2021 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -20,64 +20,40 @@ namespace panda::ecmascript { using CaptureState = RegExpExecutor::CaptureState; -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static SaveStartOpCode g_saveStartOpcode = SaveStartOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static SaveEndOpCode g_saveEndOpcode = SaveEndOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static CharOpCode g_charOpcode = CharOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static GotoOpCode g_gotoOpcode = GotoOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static SplitNextOpCode g_splitNextOpcode = SplitNextOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static SplitFirstOpCode g_splitFirstOpcode = SplitFirstOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static MatchOpCode g_matchOpcode = MatchOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static LoopOpCode g_loopOpcode = LoopOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static LoopGreedyOpCode g_loopGreedyOpcode = LoopGreedyOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static PushCharOpCode g_pushCharOpcode = PushCharOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static CheckCharOpCode g_checkCharOpcode = CheckCharOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static PushOpCode g_pushOpcode = PushOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static PopOpCode g_popOpcode = PopOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static SaveResetOpCode g_saveResetOpcode = SaveResetOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static LineStartOpCode g_lineStartOpcode = LineStartOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static LineEndOpCode g_lineEndOpcode = LineEndOpCode(); +static SaveStartOpCode g_saveStartOpcode = SaveStartOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static SaveEndOpCode g_saveEndOpcode = SaveEndOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static CharOpCode g_charOpcode = CharOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static GotoOpCode g_gotoOpcode = GotoOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static SplitNextOpCode g_splitNextOpcode = SplitNextOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static SplitFirstOpCode g_splitFirstOpcode = SplitFirstOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static MatchOpCode g_matchOpcode = MatchOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static LoopOpCode g_loopOpcode = LoopOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static LoopGreedyOpCode g_loopGreedyOpcode = LoopGreedyOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static PushCharOpCode g_pushCharOpcode = PushCharOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static CheckCharOpCode g_checkCharOpcode = CheckCharOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static PushOpCode g_pushOpcode = PushOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static PopOpCode g_popOpcode = PopOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static SaveResetOpCode g_saveResetOpcode = SaveResetOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static LineStartOpCode g_lineStartOpcode = LineStartOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static LineEndOpCode g_lineEndOpcode = LineEndOpCode(); // NOLINT(fuchsia-statically-constructed-objects) // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) static WordBoundaryOpCode g_wordBoundaryOpcode = WordBoundaryOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) static NotWordBoundaryOpCode g_notWordBoundaryOpcode = NotWordBoundaryOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static AllOpCode g_allOpcode = AllOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static DotsOpCode g_dotsOpcode = DotsOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static MatchAheadOpCode g_matchAheadOpcode = MatchAheadOpCode(); +static AllOpCode g_allOpcode = AllOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static DotsOpCode g_dotsOpcode = DotsOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static MatchAheadOpCode g_matchAheadOpcode = MatchAheadOpCode(); // NOLINT(fuchsia-statically-constructed-objects) // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) static NegativeMatchAheadOpCode g_negativeMatchAheadOpcode = NegativeMatchAheadOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static MatchEndOpCode g_matchEndOpcode = MatchEndOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static PrevOpCode g_prevOpcode = PrevOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static RangeOpCode g_rangeOpcode = RangeOpCode(); +static MatchEndOpCode g_matchEndOpcode = MatchEndOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static PrevOpCode g_prevOpcode = PrevOpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static RangeOpCode g_rangeOpcode = RangeOpCode(); // NOLINT(fuchsia-statically-constructed-objects) // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) static BackReferenceOpCode g_backreferenceOpcode = BackReferenceOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) static BackwardBackReferenceOpCode g_backwardBackreferenceOpcode = BackwardBackReferenceOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static Char32OpCode g_char32Opcode = Char32OpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static Range32OpCode g_range32Opcode = Range32OpCode(); +static Char32OpCode g_char32Opcode = Char32OpCode(); // NOLINT(fuchsia-statically-constructed-objects) +static Range32OpCode g_range32Opcode = Range32OpCode(); // NOLINT(fuchsia-statically-constructed-objects) // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) static std::vector g_intrinsicSet = { &g_saveStartOpcode, @@ -473,8 +449,8 @@ uint32_t RangeOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_ { out << offset << ":\t" << "range\t"; - int size = buf.GetU16(offset + 1); - for (int i = 0; i < size; i++) { + size_t size = buf.GetU16(offset + 1); + for (size_t i = 0; i < size; i++) { out << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_FOUR)) << "\t" << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_TWO)) @@ -500,8 +476,8 @@ uint32_t Range32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint3 { out << offset << ":\t" << "range32\t"; - int size = buf.GetU16(offset + 1); - for (int i = 0; i < size; i++) { + size_t size = buf.GetU16(offset + 1); + for (size_t i = 0; i < size; i++) { out << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_EIGHT)) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_FOUR)) diff --git a/runtime/regexp/regexp_opcode.h b/runtime/regexp/regexp_opcode.h index 6cf830d096d6b676249cf30c8680daa8b149b934..f5048e1bab2138cf41ad0228a3ad37ce7c8d64fa 100644 --- a/runtime/regexp/regexp_opcode.h +++ b/runtime/regexp/regexp_opcode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Huawei Device Co., Ltd. + * Copyright (c) 2021 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -18,7 +18,7 @@ #include -#include "plugins/ecmascript/runtime/regexp/dyn_chunk.h" +#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" namespace panda::ecmascript { class RegExpOpCode { @@ -73,7 +73,7 @@ public: static RegExpOpCode *GetRegExpOpCode(const DynChunk &buf, int pcOffset); static RegExpOpCode *GetRegExpOpCode(uint8_t opCode); static void DumpRegExpOpCode(std::ostream &out, const DynChunk &buf); - inline int GetSize() const + inline uint8_t GetSize() const { return size_; } @@ -343,7 +343,6 @@ public: } return false; } - inline uint32_t HighestValue() const { if (!rangeSet_.empty()) { @@ -351,7 +350,6 @@ public: } return 0; } - RangeSet(RangeSet const &) = default; RangeSet &operator=(RangeSet const &) = default; RangeSet(RangeSet &&) = default; diff --git a/runtime/regexp/regexp_parser.cpp b/runtime/regexp/regexp_parser.cpp index d08eafac492c53d917a42e096b346a2e314b0bad..4e7f906a1c653b2a0eef99360db79c9eddb88dd9 100644 --- a/runtime/regexp/regexp_parser.cpp +++ b/runtime/regexp/regexp_parser.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Huawei Device Co., Ltd. + * Copyright (c) 2021 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -20,13 +20,19 @@ #include "plugins/ecmascript/runtime/regexp/regexp_opcode.h" #include "libpandabase/utils/utils.h" #include "securec.h" +#include "unicode/uchar.h" #include "unicode/uniset.h" #define _NO_DEBUG_ namespace panda::ecmascript { -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static RangeSet g_rangeD(0x30, 0x39); // NOLINT(readability-magic-numbers) +static constexpr uint32_t CACHE_SIZE = 128; +static constexpr uint32_t CHAR_MAXS = 128; +// NOLINTNEXTLINE(modernize-avoid-c-arrays) +static constexpr uint32_t ID_START_TABLE_ASCII[4] = { + /* $ A-Z _ a-z */ + 0x00000000, 0x00000010, 0x87FFFFFE, 0x07FFFFFE}; +static RangeSet g_rangeD(0x30, 0x39); // NOLINT(fuchsia-statically-constructed-objects, readability-magic-numbers) // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) static RangeSet g_rangeS({ std::pair(0x0009, 0x000D), // NOLINT(readability-magic-numbers) @@ -146,21 +152,21 @@ uint32_t RegExpParser::ParseOctalLiteral() bool RegExpParser::ParseUnlimitedLengthHexNumber(uint32_t maxValue, uint32_t *value) { uint32_t x = 0; - int d = HexValue(c0_); + int d = static_cast(HexValue(c0_)); if (d < 0) { return false; } while (d >= 0) { - if (UNLIKELY(x > (std::numeric_limits::max() - d) / HEX_VALUE)) { + if (UNLIKELY(x > (std::numeric_limits::max() - static_cast(d)) / HEX_VALUE)) { LOG_ECMA(FATAL) << "value overflow"; return false; } - x = x * HEX_VALUE + d; + x = x * HEX_VALUE + static_cast(d); if (x > maxValue) { return false; } Advance(); - d = HexValue(c0_); + d = static_cast(HexValue(c0_)); } *value = x; return true; @@ -210,13 +216,13 @@ bool RegExpParser::ParseHexEscape(int length, uint32_t *value) uint32_t val = 0; for (int i = 0; i < length; ++i) { uint32_t c = c0_; - int d = HexValue(c); + int d = static_cast(HexValue(c)); if (d < 0) { pc_ = start; Advance(); return false; } - val = val * HEX_VALUE + d; + val = val * HEX_VALUE + static_cast(d); Advance(); } *value = val; @@ -241,15 +247,17 @@ void RegExpParser::ParseAlternative(bool isBackward) LineStartOpCode lineStartOp; lineStartOp.EmitOpCode(&buffer_, 0); Advance(); - } break; + break; + } case '$': { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("Assertion %c line end \n", c0_); LineEndOpCode lineEndOp; lineEndOp.EmitOpCode(&buffer_, 0); Advance(); - } break; - case '\\': + break; + } + case '\\': { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("Escape %c \n", c0_); Advance(); @@ -260,15 +268,17 @@ void RegExpParser::ParseAlternative(bool isBackward) WordBoundaryOpCode wordBoundaryOp; wordBoundaryOp.EmitOpCode(&buffer_, 0); Advance(); - } break; + break; + } case 'B': { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("Assertion %c \n", c0_); NotWordBoundaryOpCode notWordBoundaryOp; notWordBoundaryOp.EmitOpCode(&buffer_, 0); Advance(); - } break; - default: + break; + } + default: { isAtom = true; int atomValue = ParseAtomEscape(isBackward); if (atomValue != -1) { @@ -300,14 +310,17 @@ void RegExpParser::ParseAlternative(bool isBackward) } } break; + } } break; + } case '(': { Advance(); isAtom = ParseAssertionCapture(&captureIndex, isBackward); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) Advance(); - } break; + break; + } case '.': { PrevOpCode prevOp; if (isBackward) { @@ -327,7 +340,8 @@ void RegExpParser::ParseAlternative(bool isBackward) PrintF("Atom %c match any \n", c0_); isAtom = true; Advance(); - } break; + break; + } case '[': { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("Atom %c match range \n", c0_); @@ -361,7 +375,8 @@ void RegExpParser::ParseAlternative(bool isBackward) if (isBackward) { prevOp.EmitOpCode(&buffer_, 0); } - } break; + break; + } case '*': case '+': case '?': @@ -386,7 +401,7 @@ void RegExpParser::ParseAlternative(bool isBackward) return; } [[fallthrough]]; - default: + default: { // PatternCharacter // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("PatternCharacter %c\n", c0_); @@ -404,11 +419,11 @@ void RegExpParser::ParseAlternative(bool isBackward) int32_t length = end_ - pc_ + 1; // NOLINTNEXTLINE(hicpp-signed-bitwise) U8_NEXT(pc_, i, length, c); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - matchedChar = c; + matchedChar = static_cast(c); pc_ += i; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } if (IsIgnoreCase()) { - matchedChar = Canonicalize(matchedChar, IsUtf16()); + matchedChar = static_cast(Canonicalize(static_cast(matchedChar), IsUtf16())); } if (matchedChar > UINT16_MAX) { Char32OpCode charOp; @@ -423,6 +438,7 @@ void RegExpParser::ParseAlternative(bool isBackward) } Advance(); break; + } } if (isAtom && !isError_) { ParseQuantifier(atomBcStart, captureIndex, captureCount_ - 1); @@ -432,11 +448,8 @@ void RegExpParser::ParseAlternative(bool isBackward) size_t termSize = end - atomBcStart; size_t moveSize = end - start; buffer_.Expand(end + termSize); - if (memmove_s(buffer_.buf_ + start + // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - termSize, // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - moveSize, - buffer_.buf_ + start, // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - moveSize) != EOK) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (memmove_s(buffer_.buf_ + start + termSize, moveSize, buffer_.buf_ + start, moveSize) != EOK) { LOG_ECMA(FATAL) << "memmove_s failed"; UNREACHABLE(); } @@ -487,7 +500,8 @@ bool RegExpParser::ParseAssertionCapture(int *captureIndex, bool isBackward) MatchAheadOpCode matchAheadOp; uint32_t len = buffer_.size_ - start; matchAheadOp.InsertOpCode(&buffer_, start, len); - } break; + break; + } // (?!Disjunction[?U, ?N]) case '!': { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) @@ -500,8 +514,9 @@ bool RegExpParser::ParseAssertionCapture(int *captureIndex, bool isBackward) NegativeMatchAheadOpCode matchAheadOp; uint32_t len = buffer_.size_ - start; matchAheadOp.InsertOpCode(&buffer_, start, len); - } break; - case '<': + break; + } + case '<': { Advance(); // (?<=Disjunction[?U, ?N]) if (c0_ == '=') { @@ -540,12 +555,14 @@ bool RegExpParser::ParseAssertionCapture(int *captureIndex, bool isBackward) return false; } groupNames_.EmitStr(name.c_str()); + newGroupNames_.push_back(name); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("group name %s", name.c_str()); Advance(); goto parseCapture; // NOLINT(cppcoreguidelines-avoid-goto) } break; + } // (?:Disjunction[?U, ?N]) case ':': // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) @@ -594,7 +611,7 @@ int RegExpParser::ParseDecimalDigits() { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("Parse DecimalDigits------\n"); - int result = 0; + uint32_t result = 0; bool overflow = false; while (true) { if (c0_ < '0' || c0_ > '9') { @@ -631,7 +648,7 @@ bool RegExpParser::ParserIntervalQuantifier(int *pmin, int *pmax) *pmin = ParseDecimalDigits(); *pmax = *pmin; switch (c0_) { - case ',': + case ',': { Advance(); if (c0_ == '}') { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) @@ -649,6 +666,7 @@ bool RegExpParser::ParserIntervalQuantifier(int *pmin, int *pmax) } } break; + } case '}': // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("QuantifierPrefix{DecimalDigits}\n"); @@ -699,7 +717,8 @@ void RegExpParser::ParseQuantifier(size_t atomBcStart, int captureStart, int cap ParseError("Invalid repetition count"); return; } - } break; + break; + } default: break; } @@ -759,32 +778,61 @@ void RegExpParser::ParseQuantifier(size_t atomBcStart, int captureStart, int cap bool RegExpParser::ParseGroupSpecifier(const uint8_t **pp, PandaString &name) { const uint8_t *p = *pp; - int c = *p; - while (c != '>') { - if (c < (INT8_MAX + 1)) { - if (name.empty()) { - if (!g_regexpIdentifyStart.IsContain(c)) { - return false; - } - } else { - if (!g_regexpIdentifyContinue.IsContain(c)) { - return false; - } + uint32_t c; + std::array buffer {}; + char *q = buffer.data(); + while (true) { + if (p <= end_) { + c = *p; + } else { + c = KEY_EOF; + } + if (c == '\\') { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + p++; + if (*p != 'u') { + return false; + } + if (!ParseUnicodeEscape(&c)) { + return false; } - name += static_cast(c); + } else if (c == '>') { + break; + } else if (c > CACHE_SIZE && c != KEY_EOF) { + c = static_cast(base::StringHelper::UnicodeFromUtf8(p, UTF8_CHAR_LEN_MAX, &p)); + } else if (c != KEY_EOF) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + p++; + } else { + return false; + } + if (q == buffer.data()) { + if (IsIdentFirst(c) != 0) { + return false; + } + } else { + if (!u_isIDPart(c)) { + return false; + } + } + if (q != nullptr) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + *q++ = c; } - c = *++p; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } - p++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + p++; *pp = p; + name = buffer.data(); return true; } int RegExpParser::ParseCaptureCount(const char *groupName) { - const uint8_t *p; + const uint8_t *p = nullptr; int captureIndex = 1; PandaString name; + hasNamedCaptures_ = 0; for (p = base_; p < end_; p++) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) switch (*p) { case '(': { @@ -793,6 +841,7 @@ int RegExpParser::ParseCaptureCount(const char *groupName) if (p[CAPTURE_CONUT_ADVANCE - 1] == '<' && p[CAPTURE_CONUT_ADVANCE] != '!' && // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) p[CAPTURE_CONUT_ADVANCE] != '=') { + hasNamedCaptures_ = 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) p += CAPTURE_CONUT_ADVANCE; if (groupName != nullptr) { @@ -807,11 +856,12 @@ int RegExpParser::ParseCaptureCount(const char *groupName) } else { captureIndex++; } - } break; + break; + } case '\\': p++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) break; - case '[': + case '[': { while (p < end_ && *p != ']') { if (*p == '\\') { p++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) @@ -819,6 +869,7 @@ int RegExpParser::ParseCaptureCount(const char *groupName) p++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } break; + } default: break; } @@ -837,6 +888,7 @@ int RegExpParser::ParseAtomEscape(bool isBackward) int result = -1; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("Parse AtomEscape------\n"); + PrevOpCode prevOp; switch (c0_) { case KEY_EOF: // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) @@ -866,55 +918,121 @@ int RegExpParser::ParseAtomEscape(bool isBackward) BackReferenceOpCode backReferenceOp; backReferenceOp.EmitOpCode(&buffer_, capture); } - } break; + break; + } // CharacterClassEscape case 'd': { // [0-9] RangeOpCode rangeOp; + if (isBackward) { + prevOp.EmitOpCode(&buffer_, 0); + } rangeOp.InsertOpCode(&buffer_, g_rangeD); - Advance(); - } break; + goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) + break; + } case 'D': { // [^0-9] RangeSet atomRange(g_rangeD); atomRange.Invert(IsUtf16()); Range32OpCode rangeOp; + if (isBackward) { + prevOp.EmitOpCode(&buffer_, 0); + } rangeOp.InsertOpCode(&buffer_, atomRange); - Advance(); - } break; + goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) + break; + } case 's': { // [\f\n\r\t\v] RangeOpCode rangeOp; + if (isBackward) { + prevOp.EmitOpCode(&buffer_, 0); + } rangeOp.InsertOpCode(&buffer_, g_rangeS); - Advance(); - } break; + goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) + break; + } case 'S': { RangeSet atomRange(g_rangeS); - atomRange.Invert(IsUtf16()); Range32OpCode rangeOp; + atomRange.Invert(IsUtf16()); + if (isBackward) { + prevOp.EmitOpCode(&buffer_, 0); + } rangeOp.InsertOpCode(&buffer_, atomRange); - Advance(); - } break; + goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) + break; + } case 'w': { // [A-Za-z0-9] RangeOpCode rangeOp; + if (isBackward) { + prevOp.EmitOpCode(&buffer_, 0); + } rangeOp.InsertOpCode(&buffer_, g_rangeW); - Advance(); - } break; + goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) + break; + } case 'W': { // [^A-Za-z0-9] RangeSet atomRange(g_rangeW); atomRange.Invert(IsUtf16()); Range32OpCode rangeOp; + if (isBackward) { + prevOp.EmitOpCode(&buffer_, 0); + } rangeOp.InsertOpCode(&buffer_, atomRange); - Advance(); - } break; + goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) + break; + } // P{UnicodePropertyValueExpression} // p{UnicodePropertyValueExpression} case 'P': case 'p': // [+N]kGroupName[?U] - case 'k': + case 'k': { + Advance(); + if (c0_ != '<') { + if (!IsUtf16() || HasNamedCaptures()) { + ParseError("expecting group name."); + break; + } + } + Advance(); + Prev(); + PandaString name; + auto **pp = const_cast(&pc_); + if (!ParseGroupSpecifier(pp, name)) { + ParseError("GroupName Syntax error."); + break; + } + int postion = FindGroupName(name); + if (postion < 0) { + postion = ParseCaptureCount(name.c_str()); + if (postion < 0 && (!IsUtf16() || HasNamedCaptures())) { + ParseError("group name not defined"); + break; + } + } + if (isBackward) { + BackwardBackReferenceOpCode backReferenceOp; + backReferenceOp.EmitOpCode(&buffer_, postion); + } else { + BackReferenceOpCode backReferenceOp; + backReferenceOp.EmitOpCode(&buffer_, postion); + } + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) + Advance(); + break; + } + parseLookBehind : { + if (isBackward) { + prevOp.EmitOpCode(&buffer_, 0); + } + Advance(); + break; + } default: result = ParseCharacterEscape(); break; @@ -922,6 +1040,22 @@ int RegExpParser::ParseAtomEscape(bool isBackward) return result; } +int RegExpParser::RecountCaptures() +{ + if (totalCaptureCount_ < 0) { + const char *name = reinterpret_cast(groupNames_.buf_); + totalCaptureCount_ = ParseCaptureCount(name); + } + return totalCaptureCount_; +} +bool RegExpParser::HasNamedCaptures() +{ + if (hasNamedCaptures_ < 0) { + RecountCaptures(); + } + return false; +} + int RegExpParser::ParseCharacterEscape() { // CharacterEscape[U]:: @@ -966,12 +1100,12 @@ int RegExpParser::ParseCharacterEscape() Advance(); break; // c ControlLetter - case 'c': + case 'c': { Advance(); if ((c0_ >= 'A' && c0_ <= 'Z') || (c0_ >= 'a' && c0_ <= 'z')) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("ControlLetter %c\n", c0_); - result = static_cast(c0_) & 0x1fU; // NOLINT(readability-magic-numbers) + result = static_cast(c0_) & 0x1f; // NOLINT(readability-magic-numbers, hicpp-signed-bitwise) Advance(); } else { if (!IsUtf16()) { @@ -983,7 +1117,8 @@ int RegExpParser::ParseCharacterEscape() } } break; - case '0': + } + case '0': { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("CharacterEscape 0 [lookahead ∉ DecimalDigit]\n"); if (IsUtf16() && !(*pc_ >= '0' && *pc_ <= '9')) { // NOLINT(readability-magic-numbers) @@ -992,13 +1127,14 @@ int RegExpParser::ParseCharacterEscape() break; } [[fallthrough]]; + } case '1': case '2': case '3': case '4': case '5': case '6': - case '7': + case '7': { if (IsUtf16()) { // With /u, decimal escape is not interpreted as octal character code. ParseError("Invalid class escape"); @@ -1006,6 +1142,7 @@ int RegExpParser::ParseCharacterEscape() } result = ParseOctalLiteral(); break; + } // ParseHexEscapeSequence // ParseRegExpUnicodeEscapeSequence case 'x': { @@ -1033,7 +1170,8 @@ int RegExpParser::ParseCharacterEscape() // If \u is not followed by a two-digit hexadecimal, treat it // as an identity escape. result = 'u'; - } break; + break; + } // IdentityEscape[?U] case '$': case '(': @@ -1055,7 +1193,7 @@ int RegExpParser::ParseCharacterEscape() result = c0_; Advance(); break; - default: + default: { if (IsUtf16()) { ParseError("Invalid unicode escape"); return 0; @@ -1063,8 +1201,11 @@ int RegExpParser::ParseCharacterEscape() // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("SourceCharacter %c\n", c0_); result = c0_; - Advance(); + if (result < CHAR_MAXS) { + Advance(); + } break; + } } return result; } @@ -1106,14 +1247,15 @@ bool RegExpParser::ParseClassRanges(RangeSet *result) result->Insert(s2); continue; } - - if (c1 > c2) { - ParseError("invalid class range"); - return false; + if (c1 < INT8_MAX) { + if (c1 > c2) { + ParseError("invalid class range"); + return false; + } } if (IsIgnoreCase()) { - c1 = Canonicalize(c1, IsUtf16()); - c2 = Canonicalize(c2, IsUtf16()); + c1 = static_cast(Canonicalize(c1, IsUtf16())); + c2 = static_cast(Canonicalize(c2, IsUtf16())); } result->Insert(c1, c2); @@ -1131,20 +1273,22 @@ uint32_t RegExpParser::ParseClassAtom(RangeSet *atom) switch (c0_) { case '\\': { Advance(); - ret = ParseClassEscape(atom); - } break; + ret = static_cast(ParseClassEscape(atom)); + break; + } case KEY_EOF: break; - case 0: + case 0: { if (pc_ >= end_) { return UINT32_MAX; } [[fallthrough]]; - default: + } + default: { uint32_t value = c0_; - int u16_size = 0; - if (c0_ > INT8_MAX) { // NOLINT(readability-magic-numbers) - pc_ -= 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + size_t u16_size = 0; + if (c0_ > INT8_MAX) { + pc_ -= 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) auto u16_result = base::utf_helper::ConvertUtf8ToUtf16Pair(pc_, true); value = u16_result.first; u16_size = u16_result.second; @@ -1153,11 +1297,12 @@ uint32_t RegExpParser::ParseClassAtom(RangeSet *atom) Advance(); } if (IsIgnoreCase()) { - value = Canonicalize(value, IsUtf16()); + value = static_cast(Canonicalize(value, IsUtf16())); } atom->Insert(RangeSet(value)); ret = value; break; + } } return ret; } @@ -1216,14 +1361,30 @@ int RegExpParser::ParseClassEscape(RangeSet *atom) // p{UnicodePropertyValueExpression} case 'P': case 'p': + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) + PrintF("Warning: \\p is not supported in ECMA 2015!"); Advance(); if (c0_ == '{') { Advance(); + if (c0_ == '}') { + break; // p{}, invalid + } bool isValue = false; ParseUnicodePropertyValueCharacters(&isValue); - if (!isValue) { + if (!isValue && c0_ == '=') { + // UnicodePropertyName = UnicodePropertyValue + Advance(); + if (c0_ == '}') { + break; // p{xxx=}, invalid + } ParseUnicodePropertyValueCharacters(&isValue); } + if (c0_ != '}') { + break; // p{xxx, invalid + } + // should do atom->Invert() here after ECMA 9.0 + Advance(); + result = CLASS_RANGE_BASE; } break; default: @@ -1243,23 +1404,17 @@ void RegExpParser::ParseUnicodePropertyValueCharacters(bool *isValue) if ((c0_ >= 'A' && c0_ <= 'Z') || (c0_ >= 'a' && c0_ <= 'z')) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("UnicodePropertyCharacter::ControlLetter %c\n", c0_); - Advance(); - } else if (c0_ == '-') { + } else if (c0_ == '_') { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("UnicodePropertyCharacter:: - \n"); - Advance(); + PrintF("UnicodePropertyCharacter:: _ \n"); } else if (c0_ >= '0' && c0_ <= '9') { *isValue = true; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) PrintF("UnicodePropertyValueCharacter::DecimalDigit %c\n", c0_); - Advance(); - } else if (*isValue && c0_ == '}') { - Advance(); - return; - } else if (!*isValue && c0_ == '=') { - Advance(); + } else { return; } + Advance(); ParseUnicodePropertyValueCharacters(isValue); } @@ -1292,4 +1447,13 @@ void RegExpParser::ParseError(const char *errorMessage) UNREACHABLE(); } } -} // namespace panda::ecmascript + +int RegExpParser::IsIdentFirst(uint32_t c) +{ + if (c < CACHE_SIZE) { + // NOLINTNEXTLINE(hicpp-signed-bitwise + return (ID_START_TABLE_ASCII[c >> 5] >> (c & 31)) & 1; // 5: Shift five bits 31: and operation binary of 31 + } + return static_cast(u_isIDStart(c)); +} +} // namespace panda::ecmascript \ No newline at end of file diff --git a/runtime/regexp/regexp_parser.h b/runtime/regexp/regexp_parser.h index dda9977e2e01f2b338ff1ec15f764f6ce3651d64..db15e6c942efaaa03d3ecaa002433f87038e05dc 100644 --- a/runtime/regexp/regexp_parser.h +++ b/runtime/regexp/regexp_parser.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Huawei Device Co., Ltd. + * Copyright (c) 2021 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,13 +19,14 @@ #include #include #include -#include "plugins/ecmascript/runtime/regexp/dyn_chunk.h" +#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" #include "plugins/ecmascript/runtime/regexp/regexp_opcode.h" #include "unicode/stringpiece.h" #include "unicode/uchar.h" #include "unicode/utf16.h" #include "unicode/utf8.h" #include "unicode/utypes.h" +#include "unicode/udata.h" namespace panda::ecmascript { class RegExpParser { @@ -36,7 +37,7 @@ public: static constexpr auto FLAG_DOTALL = (1U << 3U); static constexpr auto FLAG_UTF16 = (1U << 4U); static constexpr auto FLAG_STICKY = (1U << 5U); - static const int KEY_EOF = -1; + static const uint32_t KEY_EOF = UINT32_MAX; static constexpr int CLASS_RANGE_BASE = 0x40000000; static constexpr uint32_t NUM_CAPTURE__OFFSET = 4; static constexpr uint32_t NUM_STACK_OFFSET = 8; @@ -49,8 +50,9 @@ public: static constexpr uint32_t UNICODE_HEX_VALUE = 4; static constexpr uint32_t UNICODE_HEX_ADVANCE = 2; static constexpr uint32_t CAPTURE_CONUT_ADVANCE = 3; + static constexpr uint32_t UTF8_CHAR_LEN_MAX = 6; - RegExpParser() = default; + explicit RegExpParser() = default; ~RegExpParser() { @@ -91,6 +93,20 @@ public: bool ParseUnlimitedLengthHexNumber(uint32_t maxValue, uint32_t *value); bool ParseUnicodeEscape(uint32_t *value); bool ParserIntervalQuantifier(int *pmin, int *pmax); + bool HasNamedCaptures(); + int ParseEscape(const uint8_t **pp, int isUtf16); + int RecountCaptures(); + int IsIdentFirst(uint32_t c); + + inline PandaVector GetGroupNames() const + { + return newGroupNames_; + } + + inline size_t GetGroupNamesSize() const + { + return groupNames_.size_; + } inline bool IsError() const { @@ -208,13 +224,16 @@ private: uint8_t *pc_ {nullptr}; uint8_t *end_ {nullptr}; uint32_t flags_ {0}; - int c0_ {KEY_EOF}; + uint32_t c0_ {KEY_EOF}; int captureCount_ {0}; int stackCount_ {0}; bool isError_ {false}; char errorMsg_[TMP_BUF_SIZE] = {0}; // NOLINT(modernize-avoid-c-arrays) - DynChunk buffer_; - DynChunk groupNames_; + int hasNamedCaptures_ = -1; + int totalCaptureCount_ = -1; + DynChunk buffer_ {}; + DynChunk groupNames_ {}; + PandaVector newGroupNames_ {}; }; } // namespace panda::ecmascript #endif // ECMASCRIPT_REGEXP_PARSER_H diff --git a/runtime/regexp/regexp_parser_cache.cpp b/runtime/regexp/regexp_parser_cache.cpp index 3ee4d2e33f9f47d6e7b913cf492dff52c626e6b6..47f38eb54c86755a416cb74093527c244cb292f6 100644 --- a/runtime/regexp/regexp_parser_cache.cpp +++ b/runtime/regexp/regexp_parser_cache.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Huawei Device Co., Ltd. + * Copyright (c) 2021 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -41,18 +41,20 @@ size_t RegExpParserCache::GetHash(EcmaString *pattern, const uint32_t flags) return (pattern->GetHashcode() ^ flags) % CACHE_SIZE; } -std::pair RegExpParserCache::GetCache(EcmaString *pattern, const uint32_t flags) +std::pair RegExpParserCache::GetCache(EcmaString *pattern, const uint32_t flags, + PandaVector &groupName) { size_t hash = GetHash(pattern, flags); ParserKey &info = info_[hash]; if (info.flags_ != flags || !EcmaString::StringsAreEqual(info.pattern_, pattern)) { return std::pair(JSTaggedValue::Hole(), 0); } + groupName = info.newGroupNames_; return std::pair(info.codeBuffer_, info.bufferSize_); } void RegExpParserCache::SetCache(EcmaString *pattern, const uint32_t flags, const JSTaggedValue codeBuffer, - const size_t bufferSize) + const size_t bufferSize, PandaVector &&groupName) { size_t hash = GetHash(pattern, flags); ParserKey &info = info_[hash]; @@ -60,5 +62,6 @@ void RegExpParserCache::SetCache(EcmaString *pattern, const uint32_t flags, cons info.flags_ = flags; info.codeBuffer_ = codeBuffer; info.bufferSize_ = bufferSize; + info.newGroupNames_ = groupName; } -} // namespace panda::ecmascript +} // namespace panda::ecmascript \ No newline at end of file diff --git a/runtime/regexp/regexp_parser_cache.h b/runtime/regexp/regexp_parser_cache.h index 874f727ae829437e7186254c39468ee78d5f96e0..b83e94a54215821fef28f4772a212785b3d04c14 100644 --- a/runtime/regexp/regexp_parser_cache.h +++ b/runtime/regexp/regexp_parser_cache.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Huawei Device Co., Ltd. + * Copyright (c) 2021 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -26,16 +26,17 @@ class RegExpParserCache { public: RegExpParserCache(); ~RegExpParserCache(); + NO_COPY_SEMANTIC(RegExpParserCache); + NO_MOVE_SEMANTIC(RegExpParserCache); + static constexpr size_t CACHE_SIZE = 128; bool IsInCache(EcmaString *pattern, uint32_t flags); - std::pair GetCache(EcmaString *pattern, uint32_t flags); - void SetCache(EcmaString *pattern, uint32_t flags, JSTaggedValue codeBuffer, size_t bufferSize); + std::pair GetCache(EcmaString *pattern, uint32_t flags, PandaVector &groupName); + void SetCache(EcmaString *pattern, uint32_t flags, JSTaggedValue codeBuffer, size_t bufferSize, + PandaVector &&groupName); void Clear(); - DEFAULT_MOVE_SEMANTIC(RegExpParserCache); - DEFAULT_COPY_SEMANTIC(RegExpParserCache); - private: size_t GetHash(EcmaString *pattern, uint32_t flags); @@ -44,6 +45,7 @@ private: uint32_t flags_ {UINT32_MAX}; JSTaggedValue codeBuffer_ {JSTaggedValue::Hole()}; size_t bufferSize_ {0}; + PandaVector newGroupNames_; }; std::array info_ {}; diff --git a/runtime/runtime_call_id.h b/runtime/runtime_call_id.h index 9fd144983bfdea5349d27d9245b2782e08c7cef3..b9041b586eafbed65fd6300ba72377cc9a77b98c 100644 --- a/runtime/runtime_call_id.h +++ b/runtime/runtime_call_id.h @@ -465,6 +465,7 @@ namespace panda::ecmascript { V(RegExp, GetFlags) \ V(RegExp, GetSpecies) \ V(RegExp, Match) \ + V(RegExp, MatchAll) \ V(RegExp, Replace) \ V(RegExp, Search) \ V(RegExp, Split) \ @@ -494,6 +495,7 @@ namespace panda::ecmascript { V(String, LastIndexOf) \ V(String, LocaleCompare) \ V(String, Match) \ + V(String, MatchAll) \ V(String, Normalize) \ V(String, PadEnd) \ V(String, PadStart) \ diff --git a/runtime/runtime_sources.gn b/runtime/runtime_sources.gn index 1c21c24374de40cfdb4eb5e73542466b97d42dee..eb742b0e2c544868602990477ee6e3b9f7407a84 100644 --- a/runtime/runtime_sources.gn +++ b/runtime/runtime_sources.gn @@ -108,6 +108,7 @@ srcs = [ "js_primitive_ref.cpp", "js_promise.cpp", "js_proxy.cpp", + "js_regexp_iterator.cpp", "js_serializer.cpp", "js_set.cpp", "js_set_iterator.cpp", @@ -121,6 +122,7 @@ srcs = [ "linked_hash_table.cpp", "literal_data_extractor.cpp", "message_string.cpp", + "mem/dyn_chunk.cpp", "mem/ecma_reference_processor.cpp", "mem/ecma_string.cpp", "mem/mem_manager.cpp", @@ -129,7 +131,6 @@ srcs = [ "object_factory.cpp", "object_operator.cpp", "layout_info.cpp", - "regexp/dyn_chunk.cpp", "regexp/regexp_executor.cpp", "regexp/regexp_opcode.cpp", "regexp/regexp_parser.cpp", diff --git a/tests/runtime/builtins/builtins_regexp_test.cpp b/tests/runtime/builtins/builtins_regexp_test.cpp index 997ae260c356bdedbf35a4c713e420ec9e2062a1..96407f39dbd7e72f5055ce1709c66b7e4d19b0b2 100644 --- a/tests/runtime/builtins/builtins_regexp_test.cpp +++ b/tests/runtime/builtins/builtins_regexp_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Huawei Device Co., Ltd. + * Copyright (c) 2021 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -62,13 +62,13 @@ public: JSThread *thread {nullptr}; }; -JSTaggedValue CreateRegExpObjByPatternAndFlags(JSThread *thread, const JSHandle &pattern, - const JSHandle &flags) +JSTaggedValue CreateBuiltinsRegExpObjByPatternAndFlags(JSThread *thread, const JSHandle &pattern, + const JSHandle &flags) { JSHandle env = thread->GetEcmaVM()->GetGlobalEnv(); JSHandle regexp(env->GetRegExpFunction()); JSHandle globalObject(thread, env->GetGlobalObject()); - // make dyn_runtime_call_info + // make ecma_runtime_call_info // 8 : test case auto ecmaRuntimeCallInfo = TestHelper::CreateEcmaRuntimeCallInfo(thread, JSTaggedValue(*regexp), 8); ecmaRuntimeCallInfo->SetFunction(regexp.GetTaggedValue()); @@ -87,7 +87,7 @@ TEST_F(BuiltinsRegExpTest, RegExpConstructor1) // invoke RegExpConstructor method JSHandle pattern = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("\\w+"); JSHandle flags = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("i"); - JSTaggedValue result = CreateRegExpObjByPatternAndFlags(thread, pattern, flags); + JSTaggedValue result = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern, flags); // ASSERT IsRegExp() JSHandle regexpObject(thread, result); @@ -106,7 +106,7 @@ TEST_F(BuiltinsRegExpTest, RegExpConstructor2) // invoke RegExpConstructor method JSHandle pattern = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("\\w+"); JSHandle flags = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("i"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern, flags); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern, flags); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle env = thread->GetEcmaVM()->GetGlobalEnv(); @@ -122,6 +122,7 @@ TEST_F(BuiltinsRegExpTest, RegExpConstructor2) [[maybe_unused]] auto prev = TestHelper::SetupFrame(thread, ecmaRuntimeCallInfo.get()); // invoke RegExpConstructor method JSTaggedValue result2 = BuiltinsRegExp::RegExpConstructor(ecmaRuntimeCallInfo.get()); + TestHelper::TearDownFrame(thread, prev); // ASSERT IsRegExp() JSHandle regexpObject(thread, result2); @@ -140,7 +141,7 @@ TEST_F(BuiltinsRegExpTest, RegExpConstructor3) // invoke RegExpConstructor method JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("\\w+"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("i"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle env = thread->GetEcmaVM()->GetGlobalEnv(); @@ -175,7 +176,7 @@ TEST_F(BuiltinsRegExpTest, GetSource1) // invoke RegExpConstructor method JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString(""); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("i"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle result1Handle(thread, result1); // invoke GetSource method @@ -192,7 +193,7 @@ TEST_F(BuiltinsRegExpTest, GetSource2) // invoke RegExpConstructor method JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("/w+"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("i"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle result1Handle(thread, result1); // invoke GetSource method @@ -208,7 +209,7 @@ TEST_F(BuiltinsRegExpTest, Get) // invoke RegExpConstructor method JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("\\w+"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("gimuy"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle result1Handle(thread, result1); JSHandle global(thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("global")); @@ -242,7 +243,7 @@ TEST_F(BuiltinsRegExpTest, GetFlags) // invoke RegExpConstructor method JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("\\w+"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("imuyg"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle result1Handle(thread, result1); // invoke GetFlags method @@ -258,7 +259,7 @@ TEST_F(BuiltinsRegExpTest, toString) // invoke RegExpConstructor method JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("\\w+"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("imuyg"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); auto ecmaRuntimeCallInfo = TestHelper::CreateEcmaRuntimeCallInfo(thread, JSTaggedValue::Undefined(), 4); @@ -280,7 +281,7 @@ TEST_F(BuiltinsRegExpTest, Exec1) JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("quick\\s(brown).+?(jumps)"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("ig"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle inputString = @@ -303,7 +304,7 @@ TEST_F(BuiltinsRegExpTest, Exec1) JSHandle index(thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("index")); JSHandle indexHandle(JSObject::GetProperty(thread, execResult, index).GetValue()); uint32_t resultIndex = JSTaggedValue::ToUint32(thread, indexHandle); - ASSERT_TRUE(resultIndex == 4); + ASSERT_TRUE(resultIndex == 4U); JSHandle input(thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("input")); @@ -339,7 +340,7 @@ TEST_F(BuiltinsRegExpTest, Exec2) JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("((1)|(12))((3)|(23))"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("ig"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle inputString = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("123"); @@ -362,7 +363,7 @@ TEST_F(BuiltinsRegExpTest, Exec2) JSHandle index(thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("index")); JSHandle indexHandle(JSObject::GetProperty(thread, execResult, index).GetValue()); uint32_t resultIndex = JSTaggedValue::ToUint32(thread, indexHandle); - ASSERT_TRUE(resultIndex == 0); + ASSERT_TRUE(resultIndex == 0U); JSHandle input(thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("input")); JSHandle inputHandle(JSObject::GetProperty(thread, execResult, input).GetValue()); @@ -415,7 +416,7 @@ TEST_F(BuiltinsRegExpTest, Match1) JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("quick\\s(brown).+?(jumps)"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("iug"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle inputString = @@ -444,7 +445,7 @@ TEST_F(BuiltinsRegExpTest, Test1) JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("quick\\s(brown).+?(jumps)"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("iug"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle inputString = @@ -466,7 +467,7 @@ TEST_F(BuiltinsRegExpTest, Search1) JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("quick\\s(brown).+?(jumps)"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("iug"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle inputString = @@ -487,7 +488,7 @@ TEST_F(BuiltinsRegExpTest, Split1) // invoke RegExpConstructor method JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("-"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("iug"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle inputString = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString(""); @@ -515,7 +516,7 @@ TEST_F(BuiltinsRegExpTest, Split2) // invoke RegExpConstructor method JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("-"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("iug"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle inputString = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("a-b-c"); @@ -570,7 +571,7 @@ TEST_F(BuiltinsRegExpTest, Replace1) JSHandle pattern1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("quick\\s(brown).+?(jumps)"); JSHandle flags1 = thread->GetEcmaVM()->GetFactory()->NewFromCanBeCompressString("iug"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle inputString = @@ -598,7 +599,7 @@ TEST_F(BuiltinsRegExpTest, Replace2) ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); JSHandle pattern1 = factory->NewFromCanBeCompressString("b(c)(z)?(.)"); JSHandle flags1 = factory->NewFromCanBeCompressString(""); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle inputString = factory->NewFromCanBeCompressString("abcde"); @@ -623,7 +624,7 @@ TEST_F(BuiltinsRegExpTest, Replace3) ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); JSHandle pattern1 = factory->NewFromCanBeCompressString("abc"); JSHandle flags1 = factory->NewFromCanBeCompressString("g"); - JSTaggedValue result1 = CreateRegExpObjByPatternAndFlags(thread, pattern1, flags1); + JSTaggedValue result1 = CreateBuiltinsRegExpObjByPatternAndFlags(thread, pattern1, flags1); JSHandle value(thread, reinterpret_cast(result1.GetRawData())); JSHandle inputString = factory->NewFromCanBeCompressString("abcde"); @@ -646,12 +647,14 @@ TEST_F(BuiltinsRegExpTest, RegExpParseCache) { RegExpParserCache *regExpParserCache = thread->GetEcmaVM()->GetRegExpParserCache(); ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); - JSHandle string1 = factory->NewFromCanBeCompressString("abc"); - JSHandle string2 = factory->NewFromCanBeCompressString("abcd"); - regExpParserCache->SetCache(*string1, 0, JSTaggedValue::True(), 2); - ASSERT_TRUE(regExpParserCache->GetCache(*string1, 0).first == JSTaggedValue::True()); - ASSERT_TRUE(regExpParserCache->GetCache(*string1, 0).second == 2); - ASSERT_TRUE(regExpParserCache->GetCache(*string1, RegExpParserCache::CACHE_SIZE).first == JSTaggedValue::Hole()); - ASSERT_TRUE(regExpParserCache->GetCache(*string2, 0).first == JSTaggedValue::Hole()); + JSHandle string1 = factory->NewFromString("abc"); + JSHandle string2 = factory->NewFromString("abcd"); + PandaVector vec {}; + regExpParserCache->SetCache(*string1, 0, JSTaggedValue::True(), 2, std::move(vec)); + ASSERT_TRUE(regExpParserCache->GetCache(*string1, 0, vec).first == JSTaggedValue::True()); + ASSERT_TRUE(regExpParserCache->GetCache(*string1, 0, vec).second == 2U); + ASSERT_TRUE(regExpParserCache->GetCache(*string1, RegExpParserCache::CACHE_SIZE, vec).first == + JSTaggedValue::Hole()); + ASSERT_TRUE(regExpParserCache->GetCache(*string2, 0, vec).first == JSTaggedValue::Hole()); } } // namespace panda::test diff --git a/tests/runtime/regexp/dyn_buffer_test.cpp b/tests/runtime/regexp/dyn_buffer_test.cpp index b66315c671883f21a3765969056d459b93b0041e..b288a4b1ec8cf7a08097a682f1c7afd7bd3973be 100644 --- a/tests/runtime/regexp/dyn_buffer_test.cpp +++ b/tests/runtime/regexp/dyn_buffer_test.cpp @@ -19,7 +19,7 @@ #include "plugins/ecmascript/runtime/ecma_vm.h" #include "include/runtime.h" #include "include/runtime_options.h" -#include "plugins/ecmascript/runtime/regexp/dyn_chunk.h" +#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" #include "plugins/ecmascript/runtime/object_factory.h" namespace panda::test { diff --git a/tests/runtime/regexp/regexp_test.cpp b/tests/runtime/regexp/regexp_test.cpp index f54db20b8111568c29711545d6a9031d79c6aaf5..0a07c86bb599c4cc30fe605b4437485fb9eedf08 100644 --- a/tests/runtime/regexp/regexp_test.cpp +++ b/tests/runtime/regexp/regexp_test.cpp @@ -606,6 +606,26 @@ TEST_F(RegExpTest, ParseNoError2) ASSERT_FALSE(parseResult); } +TEST_F(RegExpTest, ParseNoError3) +{ + RegExpParser parser = RegExpParser(); + PandaString source("[\\⥚]"); + parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 1); + parser.Parse(); + bool parseResult = parser.IsError(); + ASSERT_FALSE(parseResult); +} + +TEST_F(RegExpTest, ParseNoError4) +{ + RegExpParser parser = RegExpParser(); + PandaString source("[\\⊲|\\⇐]"); + parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 1); + parser.Parse(); + bool parseResult = parser.IsError(); + ASSERT_FALSE(parseResult); +} + TEST_F(RegExpTest, ParseAndExec1) { ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); @@ -623,7 +643,7 @@ TEST_F(RegExpTest, ParseAndExec1) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("ab"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -645,7 +665,7 @@ TEST_F(RegExpTest, ParseAndExec2) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 10); + ASSERT_EQ(result.captures_.size(), 10U); JSHandle str = factory->NewFromCanBeCompressString("ab"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); ASSERT_TRUE(result.captures_[1].second->Compare(*str) == 0); @@ -676,7 +696,7 @@ TEST_F(RegExpTest, ParseAndExec3) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 2); + ASSERT_EQ(result.captures_.size(), 2U); JSHandle str1 = factory->NewFromCanBeCompressString("aaba"); JSHandle str2 = factory->NewFromCanBeCompressString("ba"); ASSERT_TRUE(result.captures_[0].second->Compare(*str1) == 0); @@ -700,7 +720,7 @@ TEST_F(RegExpTest, ParseAndExec4) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("aa"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -722,7 +742,7 @@ TEST_F(RegExpTest, ParseAndExec5) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString(""); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -744,7 +764,7 @@ TEST_F(RegExpTest, ParseAndExec6) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 6); + ASSERT_EQ(result.captures_.size(), 6U); JSHandle str1 = factory->NewFromCanBeCompressString("zaacbbbcac"); JSHandle str2 = factory->NewFromCanBeCompressString("z"); JSHandle str3 = factory->NewFromCanBeCompressString("ac"); @@ -775,7 +795,7 @@ TEST_F(RegExpTest, ParseAndExec7) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("abc"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -797,7 +817,7 @@ TEST_F(RegExpTest, ParseAndExec8) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("abc"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -819,7 +839,7 @@ TEST_F(RegExpTest, ParseAndExec9) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("er"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -841,7 +861,7 @@ TEST_F(RegExpTest, ParseAndExec10) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("d"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -863,7 +883,7 @@ TEST_F(RegExpTest, ParseAndExec11) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("a"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -885,7 +905,7 @@ TEST_F(RegExpTest, ParseAndExec12) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("\n"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -907,7 +927,7 @@ TEST_F(RegExpTest, ParseAndExec13) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("abc"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -929,7 +949,7 @@ TEST_F(RegExpTest, ParseAndExec14) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("abc"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -951,7 +971,7 @@ TEST_F(RegExpTest, ParseAndExec15) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("a"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -973,7 +993,7 @@ TEST_F(RegExpTest, ParseAndExec16) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("ABC"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -995,7 +1015,7 @@ TEST_F(RegExpTest, ParseAndExec17) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("a\n"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1033,7 +1053,7 @@ TEST_F(RegExpTest, ParseAndExec19) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("a"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1055,7 +1075,7 @@ TEST_F(RegExpTest, ParseAndExec20) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 2); + ASSERT_EQ(result.captures_.size(), 2U); JSHandle str1 = factory->NewFromCanBeCompressString(""); JSHandle str2 = factory->NewFromCanBeCompressString("aaa"); ASSERT_TRUE(result.captures_[0].second->Compare(*str1) == 0); @@ -1079,7 +1099,7 @@ TEST_F(RegExpTest, ParseAndExec21) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("a"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1101,7 +1121,7 @@ TEST_F(RegExpTest, ParseAndExec22) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("aaaa:"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1123,7 +1143,7 @@ TEST_F(RegExpTest, ParseAndExec23) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("a"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1161,7 +1181,7 @@ TEST_F(RegExpTest, ParseAndExec25) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 2); + ASSERT_EQ(result.captures_.size(), 2U); JSHandle str1 = factory->NewFromCanBeCompressString(""); JSHandle str2 = factory->NewFromCanBeCompressString("ab"); ASSERT_TRUE(result.captures_[0].second->Compare(*str1) == 0); @@ -1185,7 +1205,7 @@ TEST_F(RegExpTest, ParseAndExec26) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("A"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1207,7 +1227,7 @@ TEST_F(RegExpTest, ParseAndExec27) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("Z"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1229,7 +1249,7 @@ TEST_F(RegExpTest, ParseAndExec28) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("\n"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1251,7 +1271,7 @@ TEST_F(RegExpTest, ParseAndExec29) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 2); + ASSERT_EQ(result.captures_.size(), 2U); JSHandle str = factory->NewFromCanBeCompressString(""); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); ASSERT_TRUE(result.captures_[1].second->Compare(*str) == 0); @@ -1274,7 +1294,7 @@ TEST_F(RegExpTest, ParseAndExec30) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 2); + ASSERT_EQ(result.captures_.size(), 2U); JSHandle str = factory->NewFromCanBeCompressString(""); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); ASSERT_TRUE(result.captures_[1].first); @@ -1295,7 +1315,7 @@ TEST_F(RegExpTest, ParseAndExec31) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 2); + ASSERT_EQ(result.captures_.size(), 2U); JSHandle str1 = factory->NewFromCanBeCompressString("abb"); JSHandle str2 = factory->NewFromCanBeCompressString("b"); ASSERT_TRUE(result.captures_[0].second->Compare(*str1) == 0); @@ -1317,7 +1337,7 @@ TEST_F(RegExpTest, ParseAndExec32) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 3); + ASSERT_EQ(result.captures_.size(), 3U); JSHandle str1 = factory->NewFromCanBeCompressString("abb"); JSHandle str2 = factory->NewFromCanBeCompressString("ab"); JSHandle str3 = factory->NewFromCanBeCompressString("b"); @@ -1341,7 +1361,7 @@ TEST_F(RegExpTest, ParseAndExec33) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("qya"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1361,7 +1381,7 @@ TEST_F(RegExpTest, ParseAndExec34) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("qy"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1381,7 +1401,7 @@ TEST_F(RegExpTest, ParseAndExec35) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 4); + ASSERT_EQ(result.captures_.size(), 4U); JSHandle str1 = factory->NewFromCanBeCompressString("2021-01-09"); JSHandle str2 = factory->NewFromCanBeCompressString("2021"); JSHandle str3 = factory->NewFromCanBeCompressString("01"); @@ -1407,7 +1427,7 @@ TEST_F(RegExpTest, ParseAndExec36) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 3); + ASSERT_EQ(result.captures_.size(), 3U); JSHandle str1 = factory->NewFromCanBeCompressString("Quick Brown Fox Jumps"); JSHandle str2 = factory->NewFromCanBeCompressString("Brown"); JSHandle str3 = factory->NewFromCanBeCompressString("Jumps"); @@ -1431,7 +1451,7 @@ TEST_F(RegExpTest, ParseAndExec37) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 2); + ASSERT_EQ(result.captures_.size(), 2U); JSHandle str1 = factory->NewFromCanBeCompressString("abABc"); JSHandle str2 = factory->NewFromCanBeCompressString("AB"); ASSERT_TRUE(result.captures_[0].second->Compare(*str1) == 0); @@ -1453,7 +1473,7 @@ TEST_F(RegExpTest, ParseAndExec38) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 3); + ASSERT_EQ(result.captures_.size(), 3U); JSHandle str1 = factory->NewFromCanBeCompressString("www.netscape.com"); JSHandle str2 = factory->NewFromCanBeCompressString("netscape."); JSHandle str3 = factory->NewFromCanBeCompressString("netscap"); @@ -1477,7 +1497,7 @@ TEST_F(RegExpTest, ParseAndExec39) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 2); + ASSERT_EQ(result.captures_.size(), 2U); JSHandle str1 = factory->NewFromCanBeCompressString("b"); JSHandle str2 = factory->NewFromCanBeCompressString(""); ASSERT_TRUE(result.captures_[0].second->Compare(*str1) == 0); @@ -1499,7 +1519,7 @@ TEST_F(RegExpTest, ParseAndExec40) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString(""); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1519,7 +1539,7 @@ TEST_F(RegExpTest, ParseAndExec41) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 4); + ASSERT_EQ(result.captures_.size(), 4U); JSHandle str1 = factory->NewFromCanBeCompressString("baaabaac"); JSHandle str2 = factory->NewFromCanBeCompressString("ba"); JSHandle str3 = factory->NewFromCanBeCompressString("abaac"); @@ -1544,7 +1564,7 @@ TEST_F(RegExpTest, ParseAndExec42) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("abc324234"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1564,7 +1584,7 @@ TEST_F(RegExpTest, ParseAndExec43) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("1\nl"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1584,7 +1604,7 @@ TEST_F(RegExpTest, ParseAndExec44) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("c\bd"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1604,7 +1624,7 @@ TEST_F(RegExpTest, ParseAndExec45) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("easy"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1624,7 +1644,7 @@ TEST_F(RegExpTest, ParseAndExec46) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 3); + ASSERT_EQ(result.captures_.size(), 3U); JSHandle str1 = factory->NewFromCanBeCompressString("Course_Creator = Test"); JSHandle str2 = factory->NewFromCanBeCompressString("Course_Creator"); ASSERT_TRUE(result.captures_[0].second->Compare(*str1) == 0); @@ -1648,7 +1668,7 @@ TEST_F(RegExpTest, ParseAndExec47) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("et"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1668,7 +1688,7 @@ TEST_F(RegExpTest, ParseAndExec49) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 5); + ASSERT_EQ(result.captures_.size(), 5U); JSHandle str1 = factory->NewFromCanBeCompressString("ab55"); JSHandle str2 = factory->NewFromCanBeCompressString("ab55"); JSHandle str3 = factory->NewFromCanBeCompressString("b"); @@ -1680,7 +1700,7 @@ TEST_F(RegExpTest, ParseAndExec49) ASSERT_TRUE(result.captures_[3].second->Compare(*str4) == 0); ASSERT_TRUE(result.captures_[4].second->Compare(*str5) == 0); } - +/* TEST_F(RegExpTest, ParseAndExec50) { ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); @@ -1696,7 +1716,7 @@ TEST_F(RegExpTest, ParseAndExec50) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 4); + ASSERT_EQ(result.captures_.size(), 4U); JSHandle str1 = factory->NewFromCanBeCompressString("2020-12-31"); JSHandle str2 = factory->NewFromCanBeCompressString("2020"); JSHandle str3 = factory->NewFromCanBeCompressString("12-31"); @@ -1706,7 +1726,7 @@ TEST_F(RegExpTest, ParseAndExec50) ASSERT_TRUE(result.captures_[2].second->Compare(*str3) == 0); ASSERT_TRUE(result.captures_[3].second->Compare(*str4) == 0); } - +*/ TEST_F(RegExpTest, ParseAndExec51) { RegExpParser parser = RegExpParser(); @@ -1721,7 +1741,7 @@ TEST_F(RegExpTest, ParseAndExec51) parser.GetOriginBuffer(), true); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); } TEST_F(RegExpTest, ParseAndExec52) @@ -1739,7 +1759,7 @@ TEST_F(RegExpTest, ParseAndExec52) executor.Execute(reinterpret_cast(input.c_str()), 0, input.length(), parser.GetOriginBuffer()); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 2); + ASSERT_EQ(result.captures_.size(), 2U); JSHandle str1 = factory->NewFromCanBeCompressString("aabcdaa"); JSHandle str2 = factory->NewFromCanBeCompressString("aa"); ASSERT_TRUE(result.captures_[0].second->Compare(*str1) == 0); @@ -1761,7 +1781,7 @@ TEST_F(RegExpTest, ParseAndExec53) parser.GetOriginBuffer(), true); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("\u0001"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1796,7 +1816,7 @@ TEST_F(RegExpTest, ParseAndExec55) parser.GetOriginBuffer(), false); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("e"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1816,7 +1836,7 @@ TEST_F(RegExpTest, ParseAndExec56) parser.GetOriginBuffer(), true); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromString("a啊"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1849,9 +1869,9 @@ TEST_F(RegExpTest, ParseAndExec58) bool ret = executor.Execute(reinterpret_cast(data), 0, 2, parser.GetOriginBuffer(), true); ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); char16_t data1[] = {0xdf06}; - JSHandle str = factory->NewFromUtf16UnCheck(reinterpret_cast(data1), 1, true); + JSHandle str = factory->NewFromUtf16(reinterpret_cast(data1), 1); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -1872,7 +1892,7 @@ TEST_F(RegExpTest, ParseAndExec59) ASSERT_TRUE(ret); MatchResult result = executor.GetResult(thread, ret); - ASSERT_EQ(result.captures_.size(), 1); + ASSERT_EQ(result.captures_.size(), 1U); JSHandle str = factory->NewFromCanBeCompressString("\u000B"); ASSERT_TRUE(result.captures_[0].second->Compare(*str) == 0); } @@ -2022,5 +2042,4 @@ TEST_F(RegExpTest, RangeSet10) rangeResult.Invert(false); EXPECT_EQ(rangeResult, rangeExpected); } - } // namespace panda::test