diff --git a/CMakeLists.txt b/CMakeLists.txt index 767c38e2e83fcdaa418f51007504b8b665cedce9..e9c6b6c8aaeedcc28971efec23fa37d812054df7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,12 +21,6 @@ add_definitions(-DARK_INTRINSIC_SET) target_compile_definitions(arkcompiler PUBLIC -DENABLE_BYTECODE_OPT -DARK_INTRINSIC_SET) target_compile_definitions(arkbytecodeopt PUBLIC -DENABLE_BYTECODE_OPT -DARK_INTRINSIC_SET) -set(ICU_ROOT ${PANDA_THIRD_PARTY_SOURCES_DIR}/icu) -set(ORIG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-implicit-const-int-float-conversion -Wno-unknown-warning-option") -add_subdirectory(${PANDA_THIRD_PARTY_CONFIG_DIR}/icu "${CMAKE_CURRENT_BINARY_DIR}/third_party/icu") -set(CMAKE_CXX_FLAGS "${ORIG_CMAKE_CXX_FLAGS}") - if(PANDA_WITH_TOOLCHAIN) add_subdirectory(assembler) add_subdirectory(isa) diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index eded94bc24557fae871a2fd13d7cea6e2696f4e0..418ce3ba5577ccadefaa3f8423d3c4e1b791c550 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -13,8 +13,6 @@ panda_promote_to_definitions(PANDA_ECMASCRIPT_ENABLE_RUNTIME_STAT) -option(PANDA_LINK_ICU "Enable linking with icu third party library" true) - add_subdirectory(builtins) set(ECMA_SRC_DIR ${PANDA_ECMASCRIPT_PLUGIN_SOURCE}/runtime) @@ -161,7 +159,6 @@ set(ECMASCRIPT_SOURCES ${ECMA_SRC_DIR}/linked_hash_table.cpp ${ECMA_SRC_DIR}/literal_data_extractor.cpp ${ECMA_SRC_DIR}/message_string.cpp - ${ECMA_SRC_DIR}/mem/dyn_chunk.cpp ${ECMA_SRC_DIR}/mem/ecma_reference_processor.cpp ${ECMA_SRC_DIR}/mem/ecma_string.cpp ${ECMA_SRC_DIR}/mem/mem_manager.cpp @@ -171,8 +168,6 @@ set(ECMASCRIPT_SOURCES ${ECMA_SRC_DIR}/object_operator.cpp ${ECMA_SRC_DIR}/layout_info.cpp ${ECMA_SRC_DIR}/regexp/regexp_executor.cpp - ${ECMA_SRC_DIR}/regexp/regexp_opcode.cpp - ${ECMA_SRC_DIR}/regexp/regexp_parser.cpp ${ECMA_SRC_DIR}/regexp/regexp_parser_cache.cpp ${ECMA_SRC_DIR}/tagged_dictionary.cpp ${ECMA_SRC_DIR}/template_string.cpp diff --git a/runtime/builtins/builtins_regexp.cpp b/runtime/builtins/builtins_regexp.cpp index ea3237ead726955ae9c6654c5498e0bbdf16b087..e55ea3a5f5c7ecdd4cc4ada10b349006213de3f7 100644 --- a/runtime/builtins/builtins_regexp.cpp +++ b/runtime/builtins/builtins_regexp.cpp @@ -36,8 +36,8 @@ namespace panda::ecmascript::builtins { constexpr uint32_t MIN_REPLACE_STRING_LENGTH = 1000; constexpr uint32_t MAX_SPLIT_LIMIT = 0xFFFFFFFFU; -static RegExpExecutor::MatchResult Matcher(JSThread *thread, const JSHandle ®exp, - const uint8_t *buffer, size_t length, int32_t last_index, bool is_utf16); +static MatchResult Matcher(JSThread *thread, const JSHandle ®exp, const uint8_t *buffer, + size_t length, int32_t last_index, bool is_utf16); static bool GetFlagsInternal(JSThread *thread, const JSHandle &obj, uint8_t mask); // 21.2.5.2.2 Runtime Semantics: RegExpBuiltinExec ( R, S ) @@ -719,8 +719,7 @@ JSTaggedValue RegExpReplaceFast(JSThread *thread, JSHandle ®ex str_buffer = u8_buffer.data(); } - RegExpExecutor::MatchResult match_result = - Matcher(thread, regexp, str_buffer, input_length, last_index, is_utf16); + MatchResult match_result = Matcher(thread, regexp, str_buffer, input_length, last_index, is_utf16); if (!match_result.is_success) { if ((flags & (RegExpParser::FLAG_STICKY | RegExpParser::FLAG_GLOBAL)) != 0) { last_index = 0; @@ -1357,20 +1356,20 @@ JSTaggedValue reg_exp::proto::Split(EcmaRuntimeCallInfo *argv) } // NOLINTNEXTLINE(readability-non-const-parameter) -RegExpExecutor::MatchResult Matcher(JSThread *thread, const JSHandle ®exp, const uint8_t *buffer, - size_t length, int32_t last_index, bool is_utf16) +MatchResult Matcher(JSThread *thread, const JSHandle ®exp, const uint8_t *buffer, size_t length, + int32_t last_index, bool is_utf16) { // get bytecode JSTaggedValue buffer_data = JSRegExp::Cast(regexp->GetTaggedObject())->GetByteCodeBuffer(); void *dyn_buf = JSNativePointer::Cast(buffer_data.GetTaggedObject())->GetExternalPointer(); auto bytecode_buffer = reinterpret_cast(dyn_buf); // execute - RegExpExecutor executor {}; + RegExpExecutor executor = RegExpExecutor(); if (last_index < 0) { last_index = 0; } bool ret = executor.Execute(buffer, last_index, static_cast(length), bytecode_buffer, is_utf16); - RegExpExecutor::MatchResult result = executor.GetResult(thread, ret); + MatchResult result = executor.GetResult(thread, ret); return result; } @@ -1486,7 +1485,7 @@ JSTaggedValue RegExpBuiltinExec(JSThread *thread, const JSHandle input_string->CopyDataUtf8(u8_buffer.data(), string_length + 1); str_buffer = u8_buffer.data(); } - RegExpExecutor::MatchResult match_result = Matcher(thread, regexp, str_buffer, string_length, last_index, is_utf16); + MatchResult match_result = Matcher(thread, regexp, str_buffer, string_length, last_index, is_utf16); if (!match_result.is_success) { if (global || sticky) { JSHandle last_index_value(thread, JSTaggedValue(0)); diff --git a/runtime/builtins/builtins_regexp.h b/runtime/builtins/builtins_regexp.h index 1db068d027dba0611539da86c88d69fbd12acfa1..53555a8638e9108cd030d3dba0d3dac578a999e2 100644 --- a/runtime/builtins/builtins_regexp.h +++ b/runtime/builtins/builtins_regexp.h @@ -20,9 +20,11 @@ #include "plugins/ecmascript/runtime/ecma_runtime_call_info.h" #include "plugins/ecmascript/runtime/js_tagged_value.h" #include "plugins/ecmascript/runtime/regexp/regexp_executor.h" -#include "plugins/ecmascript/runtime/regexp/regexp_parser.h" +#include "runtime/regexp/ecmascript/regexp_parser.h" namespace panda::ecmascript::builtins { +using MatchResult = RegExpMatchResult>; +using RegExpExecutor = panda::ecmascript::RegExpExecutor; class RegExpExecResultCache : public TaggedArray { public: diff --git a/runtime/js_serializer.h b/runtime/js_serializer.h index 2cdddcfcef4e237493f9b0827af7174ed475705a..975dd9b70790c7b2403fda10da67f4131dccbb45 100644 --- a/runtime/js_serializer.h +++ b/runtime/js_serializer.h @@ -25,7 +25,7 @@ #include "plugins/ecmascript/runtime/js_object.h" #include "plugins/ecmascript/runtime/js_thread.h" #include "plugins/ecmascript/runtime/js_typed_array.h" -#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" +#include "runtime/regexp/ecmascript/mem/dyn_chunk.h" namespace panda::ecmascript { enum class SerializationUID : uint8_t { diff --git a/runtime/mem/dyn_chunk.cpp b/runtime/mem/dyn_chunk.cpp deleted file mode 100644 index 8a9573ab9751fc89006e261c22bb91224026a244..0000000000000000000000000000000000000000 --- a/runtime/mem/dyn_chunk.cpp +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2021-2022 Huawei Device Co., Ltd. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" -#include "runtime/include/runtime.h" -#include "securec.h" - -namespace panda::ecmascript { -int DynChunk::Expand(size_t new_size) -{ - if (new_size > allocated_size_) { - if (error_) { - return FAILURE; - } - ASSERT(allocated_size_ <= std::numeric_limits::max() / ALLOCATE_MULTIPLIER); - size_t size = allocated_size_ * ALLOCATE_MULTIPLIER; - if (size > new_size) { - new_size = size; - } - new_size = std::max(new_size, ALLOCATE_MIN_SIZE); - // NOLINTNEXTLINE(modernize-avoid-c-arrays) - auto *new_buf = Runtime::GetCurrent()->GetInternalAllocator()->New(new_size); - if (new_buf == nullptr) { - error_ = true; - return FAILURE; - } - if (memset_s(new_buf, new_size, 0, new_size) != EOK) { - error_ = true; - return FAILURE; - } - if (buf_ != nullptr) { - if (memcpy_s(new_buf, size_, buf_, size_) != EOK) { - error_ = true; - return FAILURE; - } - } - Runtime::GetCurrent()->GetInternalAllocator()->DeleteArray(buf_); - buf_ = new_buf; - allocated_size_ = new_size; - } - return SUCCESS; -} - -int DynChunk::Insert(uint32_t position, size_t len) -{ - if (size_ < position) { - return FAILURE; - } - if (Expand(size_ + len) != 0) { - return FAILURE; - } - size_t move_size = size_ - position; - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (memmove_s(buf_ + position + len, move_size, buf_ + position, move_size) != EOK) { - return FAILURE; - } - size_ += len; - return SUCCESS; -} - -int DynChunk::Emit(const uint8_t *data, size_t length) -{ - if (UNLIKELY((size_ + length) > allocated_size_)) { - if (Expand(size_ + length) != 0) { - return FAILURE; - } - } - - if (memcpy_s(buf_ + size_, // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - length, data, length) != EOK) { - return FAILURE; - } - size_ += length; - return SUCCESS; -} - -int DynChunk::EmitChar(uint8_t c) -{ - return Emit(&c, 1); -} - -int DynChunk::EmitSelf(size_t offset, size_t length) -{ - if (UNLIKELY((size_ + length) > allocated_size_)) { - if (Expand(size_ + length) != 0) { - return FAILURE; - } - } - - if (memcpy_s(buf_ + size_, // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - length, - buf_ + offset, // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - length) != EOK) { - return FAILURE; - } - size_ += length; - return SUCCESS; -} - -int DynChunk::EmitStr(const char *str) -{ - return Emit(reinterpret_cast(str), strlen(str) + 1); -} -} // namespace panda::ecmascript diff --git a/runtime/mem/dyn_chunk.h b/runtime/mem/dyn_chunk.h deleted file mode 100644 index ba46f9ebefdcbd11707781abf8b0052781abd2d7..0000000000000000000000000000000000000000 --- a/runtime/mem/dyn_chunk.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2021-2022 Huawei Device Co., Ltd. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ECMASCRIPT_REGEXP_DYN_BUFFER_H -#define ECMASCRIPT_REGEXP_DYN_BUFFER_H - -#include -#include "plugins/ecmascript/runtime/ecma_macros.h" -#include "plugins/ecmascript/runtime/ecma_vm.h" -#include "plugins/ecmascript/runtime/js_thread.h" - -namespace panda::ecmascript { -class DynChunk { -public: - static constexpr size_t ALLOCATE_MIN_SIZE = 64; - static constexpr int FAILURE = -1; - static constexpr int SUCCESS = 0; - - explicit DynChunk() = default; - - ~DynChunk() - { - if (!is_internal_buffer_) { - Runtime::GetCurrent()->GetInternalAllocator()->DeleteArray(buf_); - } - } - - NO_COPY_SEMANTIC(DynChunk); - NO_MOVE_SEMANTIC(DynChunk); - - int Expand(size_t new_size); - - int Insert(uint32_t position, size_t len); - - int Emit(const uint8_t *data, size_t len); - - int EmitSelf(size_t offset, size_t len); - - int EmitChar(uint8_t c); - - int EmitStr(const char *str); - - inline int EmitU16(uint16_t data) - { - return Emit(reinterpret_cast(&data), U16_SIZE); - } - - inline int EmitU32(uint32_t data) - { - return Emit(reinterpret_cast(&data), U32_SIZE); - } - - inline int EmitU64(uint64_t data) - { - return Emit(reinterpret_cast(&data), U64_SIZE); - } - - inline void SetError() - { - error_ = true; - } - - inline size_t GetSize() const - { - return size_; - } - - inline size_t GetAllocatedSize() const - { - return allocated_size_; - } - - inline bool GetError() const - { - return error_; - } - - inline uint32_t GetU32(size_t offset) const - { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return UnalignedLoad(reinterpret_cast(buf_ + offset)); - } - - inline void PutU32(size_t offset, uint32_t data) const - { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return UnalignedStore(reinterpret_cast(buf_ + offset), data); - } - - inline uint32_t GetU16(size_t offset) const - { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return UnalignedLoad(reinterpret_cast(buf_ + offset)); - } - - inline void PutU16(size_t offset, uint16_t data) const - { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return UnalignedStore(reinterpret_cast(buf_ + offset), data); - } - - inline uint32_t GetU8(size_t offset) const - { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return *(buf_ + offset); - } - - inline void PutU8(size_t offset, uint8_t data) const - { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - *(buf_ + offset) = data; - } - - ALWAYS_INLINE static inline constexpr uint32_t GetBufferOffset() - { - return MEMBER_OFFSET(DynChunk, buf_); - } - -private: - static constexpr size_t ALLOCATE_MULTIPLIER = 2; - static constexpr size_t U16_SIZE = 2; - static constexpr size_t U32_SIZE = 4; - static constexpr size_t U64_SIZE = 8; - friend class RegExpParser; - friend class RegExpOpCode; - friend class RegExpExecutor; - - explicit DynChunk(uint8_t *buf) : buf_(buf), is_internal_buffer_(true) {}; - - uint8_t *buf_ {nullptr}; - bool is_internal_buffer_ {false}; - size_t size_ {0}; - size_t allocated_size_ {0}; - bool error_ {false}; -}; -} // namespace panda::ecmascript -#endif // ECMASCRIPT_REGEXP_DYN_BUFFER_H diff --git a/runtime/regexp/regexp_executor.cpp b/runtime/regexp/regexp_executor.cpp index d3fae4a7d780a04102e2bd691c77bed07cee1d24..5e1d4f28e00e53fd2c28c175723aa59502b60529 100644 --- a/runtime/regexp/regexp_executor.cpp +++ b/runtime/regexp/regexp_executor.cpp @@ -15,260 +15,25 @@ #include "plugins/ecmascript/runtime/regexp/regexp_executor.h" -#include "plugins/ecmascript/runtime/base/string_helper.h" -#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" -#include "plugins/ecmascript/runtime/regexp/regexp_opcode.h" +#include "runtime/regexp/ecmascript/mem/dyn_chunk.h" +#include "runtime/regexp/ecmascript/regexp_opcode.h" #include "securec.h" namespace panda::ecmascript { -using RegExpState = RegExpExecutor::RegExpState; -using MatchResult = RegExpExecutor::MatchResult; -bool RegExpExecutor::Execute(const uint8_t *input, uint32_t last_index, uint32_t length, uint8_t *buf, - bool is_wide_char) -{ - DynChunk buffer(buf); - input_ = const_cast(input); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - input_end_ = const_cast(input + length * (is_wide_char ? WIDE_CHAR_SIZE : CHAR_SIZE)); - uint32_t size = buffer.GetU32(0); - n_capture_ = buffer.GetU32(RegExpParser::NUM_CAPTURE__OFFSET); - n_stack_ = buffer.GetU32(RegExpParser::NUM_STACK_OFFSET); - flags_ = buffer.GetU32(RegExpParser::FLAGS_OFFSET); - is_wide_char_ = is_wide_char; - - uint32_t capture_result_size = sizeof(CaptureState) * n_capture_; - uint32_t stack_size = sizeof(uintptr_t) * n_stack_; - state_size_ = sizeof(RegExpState) + capture_result_size + stack_size; - state_stack_len_ = 0; - - auto allocator = Runtime::GetCurrent()->GetInternalAllocator(); - - if (capture_result_size != 0) { - allocator->DeleteArray(capture_result_list_); - // NOLINTNEXTLINE(modernize-avoid-c-arrays) - capture_result_list_ = allocator->New(n_capture_); - if (memset_s(capture_result_list_, capture_result_size, 0, capture_result_size) != EOK) { - LOG_ECMA(FATAL) << "memset_s failed"; - UNREACHABLE(); - } - } - if (stack_size != 0) { - allocator->DeleteArray(stack_); - // NOLINTNEXTLINE(modernize-avoid-c-arrays) - stack_ = allocator->New(n_stack_); - if (memset_s(stack_, stack_size, 0, stack_size) != EOK) { - LOG_ECMA(FATAL) << "memset_s failed"; - UNREACHABLE(); - } - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - SetCurrentPtr(input + last_index * (is_wide_char ? WIDE_CHAR_SIZE : CHAR_SIZE)); - SetCurrentPC(RegExpParser::OP_START_OFFSET); - - // first split - if ((flags_ & RegExpParser::FLAG_STICKY) == 0) { - PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); - } - return ExecuteInternal(buffer, size); -} - -bool RegExpExecutor::MatchFailed(bool is_matched) -{ - while (true) { - if (state_stack_len_ == 0) { - return true; - } - RegExpState *state = PeekRegExpState(); - if (state->type == StateType::STATE_SPLIT) { - if (!is_matched) { - PopRegExpState(); - return false; - } - } else { - is_matched = (state->type == StateType::STATE_MATCH_AHEAD && is_matched) || - (state->type == StateType::STATE_NEGATIVE_MATCH_AHEAD && !is_matched); - if (is_matched) { - if (state->type == StateType::STATE_MATCH_AHEAD) { - PopRegExpState(false); - return false; - } - if (state->type == StateType::STATE_NEGATIVE_MATCH_AHEAD) { - PopRegExpState(); - return false; - } - } - } - DropRegExpState(); - } - - return true; -} - -// NOLINTNEXTLINE(readability-function-size) -bool RegExpExecutor::ExecuteInternal(const DynChunk &byte_code, uint32_t pc_end) -{ - while (GetCurrentPC() < pc_end) { - // first split - if (!HandleFirstSplit()) { - return false; - } - uint8_t op_code = byte_code.GetU8(GetCurrentPC()); - switch (op_code) { - case RegExpOpCode::OP_DOTS: - case RegExpOpCode::OP_ALL: { - if (!HandleOpAll(op_code)) { - return false; - } - break; - } - case RegExpOpCode::OP_CHAR32: - case RegExpOpCode::OP_CHAR: { - if (!HandleOpChar(byte_code, op_code)) { - return false; - } - break; - } - case RegExpOpCode::OP_NOT_WORD_BOUNDARY: - case RegExpOpCode::OP_WORD_BOUNDARY: { - if (!HandleOpWordBoundary(op_code)) { - return false; - } - break; - } - case RegExpOpCode::OP_LINE_START: { - if (!HandleOpLineStart(op_code)) { - return false; - } - break; - } - case RegExpOpCode::OP_LINE_END: { - if (!HandleOpLineEnd(op_code)) { - return false; - } - break; - } - case RegExpOpCode::OP_SAVE_START: - HandleOpSaveStart(byte_code, op_code); - break; - case RegExpOpCode::OP_SAVE_END: - HandleOpSaveEnd(byte_code, op_code); - break; - case RegExpOpCode::OP_GOTO: { - uint32_t offset = byte_code.GetU32(GetCurrentPC() + 1); - Advance(op_code, offset); - break; - } - case RegExpOpCode::OP_MATCH: { - // jump to match ahead - if (MatchFailed(true)) { - return false; - } - break; - } - case RegExpOpCode::OP_MATCH_END: - return true; - case RegExpOpCode::OP_SAVE_RESET: - HandleOpSaveReset(byte_code, op_code); - break; - case RegExpOpCode::OP_SPLIT_NEXT: - case RegExpOpCode::OP_MATCH_AHEAD: - case RegExpOpCode::OP_NEGATIVE_MATCH_AHEAD: - HandleOpMatch(byte_code, op_code); - break; - case RegExpOpCode::OP_SPLIT_FIRST: - HandleOpSplitFirst(byte_code, op_code); - break; - case RegExpOpCode::OP_PREV: { - if (!HandleOpPrev(op_code)) { - return false; - } - break; - } - case RegExpOpCode::OP_LOOP_GREEDY: - case RegExpOpCode::OP_LOOP: - HandleOpLoop(byte_code, op_code); - break; - case RegExpOpCode::OP_PUSH_CHAR: { - PushStack(reinterpret_cast(GetCurrentPtr())); - Advance(op_code); - break; - } - case RegExpOpCode::OP_CHECK_CHAR: { - if (PopStack() != reinterpret_cast(GetCurrentPtr())) { - Advance(op_code); - } else { - uint32_t offset = byte_code.GetU32(GetCurrentPC() + 1); - Advance(op_code, offset); - } - break; - } - case RegExpOpCode::OP_PUSH: { - PushStack(0); - Advance(op_code); - break; - } - case RegExpOpCode::OP_POP: { - PopStack(); - Advance(op_code); - break; - } - case RegExpOpCode::OP_RANGE32: { - if (!HandleOpRange32(byte_code)) { - return false; - } - break; - } - case RegExpOpCode::OP_RANGE: { - if (!HandleOpRange(byte_code)) { - return false; - } - break; - } - case RegExpOpCode::OP_BACKREFERENCE: - case RegExpOpCode::OP_BACKWARD_BACKREFERENCE: { - if (!HandleOpBackReference(byte_code, op_code)) { - return false; - } - break; - } - default: - UNREACHABLE(); - } - } - // for loop match - return true; -} - -void RegExpExecutor::DumpResult(std::ostream &out) const -{ - out << "captures:" << std::endl; - for (uint32_t i = 0; i < n_capture_; i++) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - CaptureState *capture_state = &capture_result_list_[i]; - int32_t len = capture_state->capture_end - capture_state->capture_start; - if ((capture_state->capture_start != nullptr && capture_state->capture_end != nullptr) && (len >= 0)) { - out << i << ":\t" << PandaString(reinterpret_cast(capture_state->capture_start), len) - << std::endl; - } else { - out << i << ":\t" - << "undefined" << std::endl; - } - } -} -MatchResult RegExpExecutor::GetResult(const JSThread *thread, bool is_success) const +RegExpMatchResult> RegExpExecutor::GetResult(const JSThread *thread, bool is_success) const { ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); - MatchResult result; + RegExpMatchResult> result; std::vector>> captures; result.is_success = is_success; if (is_success) { - for (uint32_t i = 0; i < n_capture_; i++) { + for (uint32_t i = 0; i < GetCaptureCount(); i++) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - CaptureState *capture_state = &capture_result_list_[i]; + CaptureState *capture_state = &GetCaptureResultList()[i]; if (i == 0) { - result.index = capture_state->capture_start - input_; - if (is_wide_char_) { + result.index = capture_state->capture_start - GetInputPtr(); + if (IsWideChar()) { result.index /= WIDE_CHAR_SIZE; } } @@ -276,7 +41,7 @@ MatchResult RegExpExecutor::GetResult(const JSThread *thread, bool is_success) c std::pair> pair; if ((capture_state->capture_start != nullptr && capture_state->capture_end != nullptr) && (len >= 0)) { pair.first = false; - if (is_wide_char_) { + if (IsWideChar()) { // create utf-16 string pair.second = factory->NewFromUtf16( reinterpret_cast(capture_state->capture_start), len / 2); @@ -299,90 +64,12 @@ MatchResult RegExpExecutor::GetResult(const JSThread *thread, bool is_success) c captures.emplace_back(pair); } result.captures = captures; - result.end_index = current_ptr_ - input_; - if (is_wide_char_) { + result.end_index = GetCurrentPtr() - GetInputPtr(); + if (IsWideChar()) { result.end_index /= WIDE_CHAR_SIZE; } } return result; } -void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc) -{ - ReAllocStack(state_stack_len_ + 1); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - auto state = reinterpret_cast(state_stack_ + state_stack_len_ * state_size_); - state->type = type; - state->current_pc = pc; - state->current_stack = current_stack_; - state->current_ptr = GetCurrentPtr(); - size_t list_size = sizeof(CaptureState) * n_capture_; - if (memcpy_s(state->capture_result_list, list_size, GetCaptureResultList(), list_size) != EOK) { - LOG_ECMA(FATAL) << "memcpy_s failed"; - UNREACHABLE(); - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - uint8_t *stack_start = reinterpret_cast(state->capture_result_list) + sizeof(CaptureState) * n_capture_; - if (stack_ != nullptr) { - size_t stack_size = sizeof(uintptr_t) * n_stack_; - if (memcpy_s(stack_start, stack_size, stack_, stack_size) != EOK) { - LOG_ECMA(FATAL) << "memcpy_s failed"; - UNREACHABLE(); - } - } - state_stack_len_++; -} - -RegExpState *RegExpExecutor::PopRegExpState(bool copy_captrue) -{ - if (state_stack_len_ != 0) { - auto state = PeekRegExpState(); - size_t list_size = sizeof(CaptureState) * n_capture_; - if (copy_captrue) { - if (memcpy_s(GetCaptureResultList(), list_size, state->capture_result_list, list_size) != EOK) { - LOG_ECMA(FATAL) << "memcpy_s failed"; - UNREACHABLE(); - } - } - SetCurrentPtr(state->current_ptr); - SetCurrentPC(state->current_pc); - current_stack_ = state->current_stack; - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - uint8_t *stack_start = reinterpret_cast(state->capture_result_list) + list_size; - if (stack_ != nullptr) { - size_t stack_size = sizeof(uintptr_t) * n_stack_; - if (memcpy_s(stack_, stack_size, stack_start, stack_size) != EOK) { - LOG_ECMA(FATAL) << "memcpy_s failed"; - UNREACHABLE(); - } - } - state_stack_len_--; - return state; - } - return nullptr; -} - -void RegExpExecutor::ReAllocStack(uint32_t stack_len) -{ - auto allocator = Runtime::GetCurrent()->GetInternalAllocator(); - if (stack_len > state_stack_size_) { - uint32_t new_stack_size = std::max(state_stack_size_ * 2, MIN_STACK_SIZE); // 2: double the size - uint32_t stack_byte_size = new_stack_size * state_size_; - // NOLINTNEXTLINE(modernize-avoid-c-arrays) - auto new_stack = allocator->New(stack_byte_size); - if (memset_s(new_stack, stack_byte_size, 0, stack_byte_size) != EOK) { - LOG_ECMA(FATAL) << "memset_s failed"; - UNREACHABLE(); - } - if (state_stack_ != nullptr) { - size_t stack_size = state_stack_size_ * state_size_; - if (memcpy_s(new_stack, stack_size, state_stack_, stack_size) != EOK) { - return; - } - } - allocator->DeleteArray(state_stack_); - state_stack_ = new_stack; - state_stack_size_ = new_stack_size; - } -} } // namespace panda::ecmascript diff --git a/runtime/regexp/regexp_executor.h b/runtime/regexp/regexp_executor.h index 9f24af4df6e4bd86b3f8c6b4199a7cb90974f5fd..910d59f77de87f14188ffad59a5b21b34ad043b2 100644 --- a/runtime/regexp/regexp_executor.h +++ b/runtime/regexp/regexp_executor.h @@ -16,706 +16,15 @@ #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H -#include "plugins/ecmascript/runtime/regexp/regexp_parser.h" +#include "runtime/regexp/ecmascript/regexp_executor.h" +#include "runtime/regexp/ecmascript/regexp_parser.h" + +#include "plugins/ecmascript/runtime/base/string_helper.h" namespace panda::ecmascript { -class RegExpExecutor { +class RegExpExecutor : public panda::RegExpExecutor { public: - struct CaptureState { - const uint8_t *capture_start; - const uint8_t *capture_end; - }; - - enum StateType : uint8_t { - STATE_SPLIT = 0, - STATE_MATCH_AHEAD, - STATE_NEGATIVE_MATCH_AHEAD, - }; - - struct RegExpState { - StateType type = STATE_SPLIT; - uint32_t current_pc = 0; - uint32_t current_stack = 0; - const uint8_t *current_ptr = nullptr; - __extension__ CaptureState *capture_result_list[0]; // NOLINT(modernize-avoid-c-arrays) - }; - - struct MatchResult { - uint32_t end_index = 0; - uint32_t index = 0; - // first value is true if result is undefined - std::vector>> captures; - bool is_success = false; - }; - - explicit RegExpExecutor() = default; - - ~RegExpExecutor() - { - auto allocator = Runtime::GetCurrent()->GetInternalAllocator(); - allocator->DeleteArray(stack_); - allocator->DeleteArray(capture_result_list_); - allocator->DeleteArray(state_stack_); - } - - NO_COPY_SEMANTIC(RegExpExecutor); - NO_MOVE_SEMANTIC(RegExpExecutor); - - bool Execute(const uint8_t *input, uint32_t last_index, uint32_t length, uint8_t *buf, bool is_wide_char = false); - - bool ExecuteInternal(const DynChunk &byte_code, uint32_t pc_end); - inline bool HandleFirstSplit() - { - if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && state_stack_len_ == 0 && - (flags_ & RegExpParser::FLAG_STICKY) == 0) { - if (IsEOF()) { - if (MatchFailed()) { - return false; - } - } else { - AdvanceCurrentPtr(); - PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); - } - } - return true; - } - - inline bool HandleOpAll(uint8_t op_code) - { - if (IsEOF()) { - return !MatchFailed(); - } - uint32_t current_char = GetCurrentChar(); - if ((op_code == RegExpOpCode::OP_DOTS) && IsTerminator(current_char)) { - return !MatchFailed(); - } - Advance(op_code); - return true; - } - - inline bool HandleOpChar(const DynChunk &byte_code, uint8_t op_code) - { - uint32_t expected_char; - if (op_code == RegExpOpCode::OP_CHAR32) { - expected_char = byte_code.GetU32(GetCurrentPC() + 1); - } else { - expected_char = byte_code.GetU16(GetCurrentPC() + 1); - } - if (IsEOF()) { - return !MatchFailed(); - } - uint32_t current_char = GetCurrentChar(); - if (IsIgnoreCase()) { - current_char = static_cast(RegExpParser::Canonicalize(current_char, IsUtf16())); - } - if (current_char == expected_char) { - Advance(op_code); - } else { - if (MatchFailed()) { - return false; - } - } - return true; - } - - inline bool HandleOpWordBoundary(uint8_t op_code) - { - if (IsEOF()) { - if (op_code == RegExpOpCode::OP_WORD_BOUNDARY) { - Advance(op_code); - } else { - if (MatchFailed()) { - return false; - } - } - return true; - } - bool pre_is_word = false; - if (GetCurrentPtr() != input_) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - pre_is_word = IsWordChar(PeekPrevChar(current_ptr_, input_)); - } - bool current_is_word = IsWordChar(PeekChar(current_ptr_, input_end_)); - if (((op_code == RegExpOpCode::OP_WORD_BOUNDARY) && - ((!pre_is_word && current_is_word) || (pre_is_word && !current_is_word))) || - ((op_code == RegExpOpCode::OP_NOT_WORD_BOUNDARY) && - ((pre_is_word && current_is_word) || (!pre_is_word && !current_is_word)))) { - Advance(op_code); - } else { - if (MatchFailed()) { - return false; - } - } - return true; - } - - inline bool HandleOpLineStart(uint8_t op_code) - { - if (IsEOF()) { - return !MatchFailed(); - } - if ((GetCurrentPtr() == input_) || - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(current_ptr_, input_) == '\n')) { - Advance(op_code); - } else { - if (MatchFailed()) { - return false; - } - } - return true; - } - - inline bool HandleOpLineEnd(uint8_t op_code) - { - if (IsEOF() || - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(current_ptr_, input_end_) == '\n')) { - Advance(op_code); - } else { - if (MatchFailed()) { - return false; - } - } - return true; - } - - inline void HandleOpSaveStart(const DynChunk &byte_code, uint8_t op_code) - { - uint32_t capture_index = byte_code.GetU8(GetCurrentPC() + 1); - ASSERT(capture_index < n_capture_); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - CaptureState *capture_state = &capture_result_list_[capture_index]; - capture_state->capture_start = GetCurrentPtr(); - Advance(op_code); - } - - inline void HandleOpSaveEnd(const DynChunk &byte_code, uint8_t op_code) - { - uint32_t capture_index = byte_code.GetU8(GetCurrentPC() + 1); - ASSERT(capture_index < n_capture_); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - CaptureState *capture_state = &capture_result_list_[capture_index]; - capture_state->capture_end = GetCurrentPtr(); - Advance(op_code); - } - - inline void HandleOpSaveReset(const DynChunk &byte_code, uint8_t op_code) - { - uint32_t catpure_start_index = byte_code.GetU8(GetCurrentPC() + SAVE_RESET_START); - uint32_t catpure_end_index = byte_code.GetU8(GetCurrentPC() + SAVE_RESET_END); - for (uint32_t i = catpure_start_index; i <= catpure_end_index; i++) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - CaptureState *capture_state = &capture_result_list_[i]; - capture_state->capture_start = nullptr; - capture_state->capture_end = nullptr; - } - Advance(op_code); - } - - inline void HandleOpMatch(const DynChunk &byte_code, uint8_t op_code) - { - auto type = static_cast(op_code - RegExpOpCode::OP_SPLIT_NEXT); - ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD); - uint32_t offset = byte_code.GetU32(GetCurrentPC() + 1); - Advance(op_code); - uint32_t split_pc = GetCurrentPC() + offset; - PushRegExpState(type, split_pc); - } - - inline void HandleOpSplitFirst(const DynChunk &byte_code, uint8_t op_code) - { - uint32_t offset = byte_code.GetU32(GetCurrentPC() + 1); - Advance(op_code); - PushRegExpState(STATE_SPLIT, GetCurrentPC()); - AdvanceOffset(offset); - } - - inline bool HandleOpPrev(uint8_t op_code) - { - if (GetCurrentPtr() == input_) { - if (MatchFailed()) { - return false; - } - } else { - PrevPtr(¤t_ptr_, input_); - Advance(op_code); - } - return true; - } - - inline void HandleOpLoop(const DynChunk &byte_code, uint8_t op_code) - { - uint32_t quantify_min = byte_code.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET); - uint32_t quantify_max = byte_code.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET); - uint32_t pc_offset = byte_code.GetU32(GetCurrentPC() + LOOP_PC_OFFSET); - Advance(op_code); - uint32_t loop_pc_end = GetCurrentPC(); - uint32_t loop_pc_start = GetCurrentPC() + pc_offset; - bool is_greedy = op_code == RegExpOpCode::OP_LOOP_GREEDY; - uint32_t loop_max = is_greedy ? quantify_max : quantify_min; - - uint32_t loop_count = PeekStack(); - SetStackValue(++loop_count); - if (loop_count < loop_max) { - // greedy failed, goto next - if (loop_count >= quantify_min) { - PushRegExpState(STATE_SPLIT, loop_pc_end); - } - // Goto loop start - SetCurrentPC(loop_pc_start); - } else { - if (!is_greedy && (loop_count < quantify_max)) { - PushRegExpState(STATE_SPLIT, loop_pc_start); - } - } - } - - inline bool HandleOpRange32(const DynChunk &byte_code) - { - if (IsEOF()) { - return !MatchFailed(); - } - uint32_t current_char = GetCurrentChar(); - if (IsIgnoreCase()) { - current_char = static_cast(RegExpParser::Canonicalize(current_char, IsUtf16())); - } - uint16_t range_count = byte_code.GetU16(GetCurrentPC() + 1); - bool is_found = false; - int32_t idx_min = 0; - int32_t idx_max = static_cast(range_count) - 1; - int32_t idx = 0; - uint32_t low = 0; - uint32_t high = byte_code.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx_max * RANGE32_MAX_OFFSET + - RANGE32_MAX_HALF_OFFSET); - if (current_char <= high) { - while (idx_min <= idx_max) { - idx = (idx_min + idx_max) / RANGE32_OFFSET; - low = byte_code.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + - static_cast(idx) * RANGE32_MAX_OFFSET); - high = byte_code.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + - static_cast(idx) * RANGE32_MAX_OFFSET + RANGE32_MAX_HALF_OFFSET); - if (current_char < low) { - idx_max = idx - 1; - } else if (current_char > high) { - idx_min = idx + 1; - } else { - is_found = true; - break; - } - } - } - if (is_found) { - AdvanceOffset(range_count * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET); - } else { - if (MatchFailed()) { - return false; - } - } - return true; - } - - inline bool HandleOpRange(const DynChunk &byte_code) - { - if (IsEOF()) { - return !MatchFailed(); - } - uint32_t current_char = GetCurrentChar(); - if (IsIgnoreCase()) { - current_char = static_cast(RegExpParser::Canonicalize(current_char, IsUtf16())); - } - uint16_t range_count = byte_code.GetU16(GetCurrentPC() + 1); - bool is_found = false; - int32_t idx_min = 0; - int32_t idx_max = range_count - 1; - int32_t idx = 0; - uint32_t low = 0; - uint32_t high = - byte_code.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx_max * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); - if (current_char <= high) { - while (idx_min <= idx_max) { - idx = (idx_min + idx_max) / RANGE32_OFFSET; - low = byte_code.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + - static_cast(idx) * RANGE32_MAX_HALF_OFFSET); - high = byte_code.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + - static_cast(idx) * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); - if (current_char < low) { - idx_max = idx - 1; - } else if (current_char > high) { - idx_min = idx + 1; - } else { - is_found = true; - break; - } - } - } - if (is_found) { - AdvanceOffset(range_count * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET); - } else { - if (MatchFailed()) { - return false; - } - } - return true; - } - - inline bool HandleOpBackReference(const DynChunk &byte_code, uint8_t op_code) - { - uint32_t capture_index = byte_code.GetU8(GetCurrentPC() + 1); - if (capture_index >= n_capture_) { - return !MatchFailed(); - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - const uint8_t *capture_start = capture_result_list_[capture_index].capture_start; - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - const uint8_t *capture_end = capture_result_list_[capture_index].capture_end; - if (capture_start == nullptr || capture_end == nullptr) { - Advance(op_code); - return true; - } - bool is_matched = true; - if (op_code == RegExpOpCode::OP_BACKREFERENCE) { - const uint8_t *ref_cptr = capture_start; - while (ref_cptr < capture_end) { - if (IsEOF()) { - is_matched = false; - break; - } - // NOLINTNEXTLINE(readability-identifier-naming) - uint32_t c1 = GetChar(&ref_cptr, capture_end); - // NOLINTNEXTLINE(readability-identifier-naming) - uint32_t c2 = GetChar(¤t_ptr_, input_end_); - if (IsIgnoreCase()) { - c1 = static_cast(RegExpParser::Canonicalize(c1, IsUtf16())); - c2 = static_cast(RegExpParser::Canonicalize(c2, IsUtf16())); - } - if (c1 != c2) { - is_matched = false; - break; - } - } - if (!is_matched) { - if (MatchFailed()) { - return false; - } - } else { - Advance(op_code); - } - } else { - const uint8_t *ref_cptr = capture_end; - while (ref_cptr > capture_start) { - if (GetCurrentPtr() == input_) { - is_matched = false; - break; - } - // NOLINTNEXTLINE(readability-identifier-naming) - uint32_t c1 = GetPrevChar(&ref_cptr, capture_start); - // NOLINTNEXTLINE(readability-identifier-naming) - uint32_t c2 = GetPrevChar(¤t_ptr_, input_); - if (IsIgnoreCase()) { - c1 = static_cast(RegExpParser::Canonicalize(c1, IsUtf16())); - c2 = static_cast(RegExpParser::Canonicalize(c2, IsUtf16())); - } - if (c1 != c2) { - is_matched = false; - break; - } - } - if (!is_matched) { - if (MatchFailed()) { - return false; - } - } else { - Advance(op_code); - } - } - return true; - } - - inline void Advance(uint8_t op_code, uint32_t offset = 0) - { - current_pc_ += offset + static_cast(RegExpOpCode::GetRegExpOpCode(op_code)->GetSize()); - } - - inline void AdvanceOffset(uint32_t offset) - { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - current_pc_ += offset; - } - - inline uint32_t GetCurrentChar() - { - return GetChar(¤t_ptr_, input_end_); - } - - inline void AdvanceCurrentPtr() - { - AdvancePtr(¤t_ptr_, input_end_); - } - - uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const - { - uint32_t c; - const uint8_t *cptr = *pp; - if (!is_wide_char_) { - c = *cptr; - *pp += 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } else { - uint16_t c1 = *(reinterpret_cast(cptr)); - c = c1; - cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { - c1 = *(reinterpret_cast(cptr)); - if (U16_IS_TRAIL(c1)) { - c = static_cast(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINT(hicpp-signed-bitwise) - cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } - } - *pp = cptr; - } - return c; - } - - uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const - { - uint32_t c; - const uint8_t *cptr = p; - if (!is_wide_char_) { - c = *cptr; - } else { - uint16_t c1 = *reinterpret_cast(cptr); - c = c1; - cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { - c1 = *reinterpret_cast(cptr); - if (U16_IS_TRAIL(c1)) { - c = static_cast(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINT(hicpp-signed-bitwise) - } - } - } - return c; - } - - void AdvancePtr(const uint8_t **pp, const uint8_t *end) const - { - const uint8_t *cptr = *pp; - if (!is_wide_char_) { - *pp += 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } else { - uint16_t c1 = *reinterpret_cast(cptr); - cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) { - c1 = *reinterpret_cast(cptr); - if (U16_IS_TRAIL(c1)) { - cptr += WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } - } - *pp = cptr; - } - } - - uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const - { - uint32_t c; - const uint8_t *cptr = p; - if (!is_wide_char_) { - c = cptr[-1]; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } else { - cptr -= WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - uint16_t c1 = *reinterpret_cast(cptr); - c = c1; - if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - c1 = reinterpret_cast(cptr)[-1]; - if (U16_IS_LEAD(c1)) { - c = static_cast(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINT(hicpp-signed-bitwise) - } - } - } - return c; - } - - uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const - { - uint32_t c; - const uint8_t *cptr = *pp; - if (!is_wide_char_) { - c = cptr[-1]; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - cptr -= 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - *pp = cptr; - } else { - cptr -= WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - uint16_t c1 = *reinterpret_cast(cptr); - c = c1; - if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - c1 = reinterpret_cast(cptr)[-1]; - if (U16_IS_LEAD(c1)) { - c = static_cast(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINT(hicpp-signed-bitwise) - cptr -= WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } - } - *pp = cptr; - } - return c; - } - - void PrevPtr(const uint8_t **pp, const uint8_t *start) const - { - const uint8_t *cptr = *pp; - if (!is_wide_char_) { - cptr -= 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - *pp = cptr; - } else { - cptr -= WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - uint16_t c1 = *reinterpret_cast(cptr); - if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - c1 = reinterpret_cast(cptr)[-1]; - if (U16_IS_LEAD(c1)) { - cptr -= WIDE_CHAR_SIZE; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } - } - *pp = cptr; - } - } - - bool MatchFailed(bool is_matched = false); - - void SetCurrentPC(uint32_t pc) - { - current_pc_ = pc; - } - - void SetCurrentPtr(const uint8_t *ptr) - { - current_ptr_ = ptr; - } - - bool IsEOF() const - { - return current_ptr_ >= input_end_; - } - - uint32_t GetCurrentPC() const - { - return current_pc_; - } - - void PushStack(uintptr_t val) - { - ASSERT(current_stack_ < n_stack_); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - stack_[current_stack_++] = val; - } - - void SetStackValue(uintptr_t val) const - { - ASSERT(current_stack_ >= 1); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - stack_[current_stack_ - 1] = val; - } - - uintptr_t PopStack() - { - ASSERT(current_stack_ >= 1); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return stack_[--current_stack_]; - } - - uintptr_t PeekStack() const - { - ASSERT(current_stack_ >= 1); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return stack_[current_stack_ - 1]; - } - - const uint8_t *GetCurrentPtr() const - { - return current_ptr_; - } - - CaptureState *GetCaptureResultList() const - { - return capture_result_list_; - } - - void DumpResult(std::ostream &out) const; - - MatchResult GetResult(const JSThread *thread, bool is_success) const; - - void PushRegExpState(StateType type, uint32_t pc); - - RegExpState *PopRegExpState(bool copy_captrue = true); - - void DropRegExpState() - { - state_stack_len_--; - } - - RegExpState *PeekRegExpState() const - { - ASSERT(state_stack_len_ >= 1); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return reinterpret_cast(state_stack_ + (state_stack_len_ - 1) * state_size_); - } - - void ReAllocStack(uint32_t stack_len); - - inline bool IsWordChar(uint8_t value) const - { - return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') || - (value == '_')); - } - - inline bool IsTerminator(uint32_t value) const - { - // NOLINTNEXTLINE(readability-magic-numbers) - return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029); - } - - inline bool IsIgnoreCase() const - { - return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0; - } - - inline bool IsUtf16() const - { - return (flags_ & RegExpParser::FLAG_UTF16) != 0; - } - -private: - static constexpr size_t CHAR_SIZE = 1; - static constexpr size_t WIDE_CHAR_SIZE = 2; - static constexpr size_t SAVE_RESET_START = 1; - static constexpr size_t SAVE_RESET_END = 2; - static constexpr size_t LOOP_MIN_OFFSET = 5; - static constexpr size_t LOOP_MAX_OFFSET = 9; - static constexpr size_t LOOP_PC_OFFSET = 1; - static constexpr size_t RANGE32_HEAD_OFFSET = 3; - static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4; - static constexpr size_t RANGE32_MAX_OFFSET = 8; - static constexpr size_t RANGE32_OFFSET = 2; - static constexpr uint32_t STACK_MULTIPLIER = 2; - static constexpr uint32_t MIN_STACK_SIZE = 8; - uint8_t *input_ = nullptr; - uint8_t *input_end_ = nullptr; - bool is_wide_char_ = false; - - uint32_t current_pc_ = 0; - const uint8_t *current_ptr_ = nullptr; - CaptureState *capture_result_list_ = nullptr; - uintptr_t *stack_ = nullptr; - uint32_t current_stack_ = 0; - - uint32_t n_capture_ = 0; - uint32_t n_stack_ = 0; - - uint32_t flags_ = 0; - uint32_t state_stack_len_ = 0; - uint32_t state_stack_size_ = 0; - uint32_t state_size_ = 0; - uint8_t *state_stack_ = nullptr; + RegExpMatchResult> GetResult(const JSThread *thread, bool is_success) const; }; } // namespace panda::ecmascript #endif // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H diff --git a/runtime/regexp/regexp_opcode.cpp b/runtime/regexp/regexp_opcode.cpp deleted file mode 100644 index cd9ff09339fbe95ca4096106aacb8da9c5b70a2a..0000000000000000000000000000000000000000 --- a/runtime/regexp/regexp_opcode.cpp +++ /dev/null @@ -1,663 +0,0 @@ -/* - * Copyright (c) 2021 Huawei Device Co., Ltd. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "plugins/ecmascript/runtime/regexp/regexp_opcode.h" - -#include "plugins/ecmascript/runtime/regexp/regexp_executor.h" - -namespace panda::ecmascript { -using CaptureState = RegExpExecutor::CaptureState; - -static SaveStartOpCode G_SAVE_START_OPCODE = SaveStartOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static SaveEndOpCode G_SAVE_END_OPCODE = SaveEndOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static CharOpCode G_CHAR_OPCODE = CharOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static GotoOpCode G_GOTO_OPCODE = GotoOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static SplitNextOpCode G_SPLIT_NEXT_OPCODE = SplitNextOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static SplitFirstOpCode G_SPLIT_FIRST_OPCODE = SplitFirstOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static MatchOpCode G_MATCH_OPCODE = MatchOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static LoopOpCode G_LOOP_OPCODE = LoopOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static LoopGreedyOpCode G_LOOP_GREEDY_OPCODE = LoopGreedyOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static PushCharOpCode G_PUSH_CHAR_OPCODE = PushCharOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static CheckCharOpCode G_CHECK_CHAR_OPCODE = CheckCharOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static PushOpCode G_PUSH_OPCODE = PushOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static PopOpCode G_POP_OPCODE = PopOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static SaveResetOpCode G_SAVE_RESET_OPCODE = SaveResetOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static LineStartOpCode G_LINE_START_OPCODE = LineStartOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static LineEndOpCode G_LINE_END_OPCODE = LineEndOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static WordBoundaryOpCode G_WORD_BOUNDARY_OPCODE = WordBoundaryOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static NotWordBoundaryOpCode G_NOT_WORD_BOUNDARY_OPCODE = NotWordBoundaryOpCode(); -static AllOpCode G_ALL_OPCODE = AllOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static DotsOpCode G_DOTS_OPCODE = DotsOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static MatchAheadOpCode G_MATCH_AHEAD_OPCODE = MatchAheadOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static NegativeMatchAheadOpCode G_NEGATIVE_MATCH_AHEAD_OPCODE = NegativeMatchAheadOpCode(); -static MatchEndOpCode G_MATCH_END_OPCODE = MatchEndOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static PrevOpCode G_PREV_OPCODE = PrevOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static RangeOpCode G_RANGE_OPCODE = RangeOpCode(); // NOLINT(fuchsia-statically-constructed-objects) -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static BackReferenceOpCode G_BACKREFERENCE_OPCODE = BackReferenceOpCode(); -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static BackwardBackReferenceOpCode G_BACKWARD_BACKREFERENCE_OPCODE = BackwardBackReferenceOpCode(); -static Char32OpCode G_CHAR32_OPCODE = Char32OpCode(); // NOLINT(fuchsia-statically-constructed-objects) -static Range32OpCode G_RANGE32_OPCODE = Range32OpCode(); // NOLINT(fuchsia-statically-constructed-objects) -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static std::vector G_INTRINSIC_SET = { - &G_SAVE_START_OPCODE, - &G_SAVE_END_OPCODE, - &G_CHAR_OPCODE, - &G_GOTO_OPCODE, - &G_SPLIT_FIRST_OPCODE, - &G_SPLIT_NEXT_OPCODE, - &G_MATCH_AHEAD_OPCODE, - &G_NEGATIVE_MATCH_AHEAD_OPCODE, - &G_MATCH_OPCODE, - &G_LOOP_OPCODE, - &G_LOOP_GREEDY_OPCODE, - &G_PUSH_CHAR_OPCODE, - &G_CHECK_CHAR_OPCODE, - &G_PUSH_OPCODE, - &G_POP_OPCODE, - &G_SAVE_RESET_OPCODE, - &G_LINE_START_OPCODE, - &G_LINE_END_OPCODE, - &G_WORD_BOUNDARY_OPCODE, - &G_NOT_WORD_BOUNDARY_OPCODE, - &G_ALL_OPCODE, - &G_DOTS_OPCODE, - &G_MATCH_END_OPCODE, - &G_PREV_OPCODE, - &G_RANGE_OPCODE, - &G_BACKREFERENCE_OPCODE, - &G_BACKWARD_BACKREFERENCE_OPCODE, - &G_CHAR32_OPCODE, - &G_RANGE32_OPCODE, -}; - -RegExpOpCode::RegExpOpCode(uint8_t op_code, int size) : op_code_(op_code), size_(size) {} - -/* static */ -RegExpOpCode *RegExpOpCode::GetRegExpOpCode(const DynChunk &buf, int pc) -{ - uint8_t op_code = buf.GetU8(pc); - ASSERT_PRINT(op_code <= G_INTRINSIC_SET.size(), "invalid op code"); - return G_INTRINSIC_SET.at(op_code); -} - -/* static */ -RegExpOpCode *RegExpOpCode::GetRegExpOpCode(uint8_t op_code) -{ - ASSERT_PRINT(op_code <= G_INTRINSIC_SET.size(), "invalid op code"); - return G_INTRINSIC_SET.at(op_code); -} - -/* static */ -void RegExpOpCode::DumpRegExpOpCode(std::ostream &out, const DynChunk &buf) -{ - out << "OpCode:\t" << std::endl; - uint32_t pc = RegExpParser::OP_START_OFFSET; - do { - RegExpOpCode *byte_code = GetRegExpOpCode(buf, pc); - pc = byte_code->DumpOpCode(out, buf, pc); - } while (pc < buf.size_); -} - -uint32_t SaveStartOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const -{ - auto capture = static_cast(para & 0xffU); // NOLINT(readability-magic-numbers) - buf->EmitChar(GetOpCode()); - buf->EmitChar(capture); - return GetDynChunkfSize(*buf); -} - -uint32_t SaveStartOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "save_start\t" << buf.GetU8(offset + 1) << std::endl; - return offset + GetSize(); -} - -uint32_t SaveEndOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const -{ - auto capture = static_cast(para & 0xffU); // NOLINT(readability-magic-numbers) - buf->EmitChar(GetOpCode()); - buf->EmitChar(capture); - return GetDynChunkfSize(*buf); -} - -uint32_t SaveEndOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "save_end\t" << buf.GetU8(offset + 1) << std::endl; - return offset + GetSize(); -} - -uint32_t CharOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const -{ - auto para_char = static_cast(para & 0xffffU); // NOLINT(readability-magic-numbers) - buf->EmitChar(GetOpCode()); - buf->EmitU16(para_char); - return GetDynChunkfSize(*buf); -} - -uint32_t CharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "char\t" << static_cast(buf.GetU16(offset + 1)) << std::endl; - return offset + GetSize(); -} - -uint32_t Char32OpCode::EmitOpCode(DynChunk *buf, uint32_t para) const -{ - buf->EmitChar(GetOpCode()); - buf->EmitU32(para); - return GetDynChunkfSize(*buf); -} - -uint32_t Char32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "char32\t" << static_cast(buf.GetU32(offset + 1)) << std::endl; - return offset + GetSize(); -} - -uint32_t GotoOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const -{ - buf->EmitChar(GetOpCode()); - buf->EmitU32(para); - return GetDynChunkfSize(*buf); -} - -void GotoOpCode::UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const -{ - buf->PutU32(offset + 1, para); -} - -uint32_t GotoOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "goto\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; - return offset + GetSize(); -} - -uint32_t SplitNextOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const -{ - buf->Insert(offset, GetSize()); - buf->PutU8(offset, GetOpCode()); - buf->PutU32(offset + 1, para); - return GetDynChunkfSize(*buf); -} - -uint32_t SplitNextOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "split_next\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; - return offset + GetSize(); -} - -uint32_t SplitFirstOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const -{ - buf->Insert(offset, GetSize()); - buf->PutU8(offset, GetOpCode()); - buf->PutU32(offset + 1, para); - return GetDynChunkfSize(*buf); -} - -uint32_t SplitFirstOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "split_first\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; - return offset + GetSize(); -} - -uint32_t LoopOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const -{ - buf->EmitChar(GetOpCode()); - buf->EmitU32(start); - buf->EmitU32(min); - buf->EmitU32(max); - return GetDynChunkfSize(*buf); -} - -uint32_t LoopOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t" - << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE) - << std::endl; - return offset + GetSize(); -} - -uint32_t LoopGreedyOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const -{ - buf->EmitChar(GetOpCode()); - buf->EmitU32(start); - buf->EmitU32(min); - buf->EmitU32(max); - return GetDynChunkfSize(*buf); -} - -uint32_t LoopGreedyOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "greedy_loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t" - << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE) - << std::endl; - return offset + GetSize(); -} - -uint32_t PushCharOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const -{ - buf->Insert(offset, GetSize()); - buf->PutU8(offset, GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t PushCharOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "push_char" << std::endl; - return offset + GetSize(); -} - -uint32_t PushOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const -{ - buf->Insert(offset, GetSize()); - buf->PutU8(offset, GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t PushOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "push" << std::endl; - return offset + GetSize(); -} - -uint32_t PopOpCode::EmitOpCode(DynChunk *buf) const -{ - buf->EmitChar(GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t PopOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "pop" << std::endl; - return offset + GetSize(); -} - -uint32_t CheckCharOpCode::EmitOpCode(DynChunk *buf, uint32_t offset) const -{ - buf->EmitChar(GetOpCode()); - buf->EmitU32(offset); - return GetDynChunkfSize(*buf); -} - -uint32_t CheckCharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "check_char\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; - return offset + GetSize(); -} - -uint32_t SaveResetOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const -{ - auto capture_start = static_cast(start & 0xffU); // NOLINT(readability-magic-numbers) - auto capture_end = static_cast(end & 0xffU); // NOLINT(readability-magic-numbers) - buf->Insert(offset, GetSize()); - buf->PutU8(offset, GetOpCode()); - buf->PutU8(offset + RegExpOpCode::OP_SIZE_ONE, capture_start); - buf->PutU8(offset + RegExpOpCode::OP_SIZE_TWO, capture_end); - return GetDynChunkfSize(*buf); -} - -uint32_t SaveResetOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "save_reset\t" << buf.GetU8(offset + RegExpOpCode::OP_SIZE_ONE) << "\t" - << buf.GetU8(offset + RegExpOpCode::OP_SIZE_TWO) << std::endl; - return offset + GetSize(); -} - -uint32_t MatchOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const -{ - buf->EmitChar(GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t MatchOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "match" << std::endl; - return offset + GetSize(); -} - -uint32_t MatchEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const -{ - buf->EmitChar(GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t MatchEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "match_end" << std::endl; - return offset + GetSize(); -} - -uint32_t LineStartOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const -{ - buf->EmitChar(GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t LineStartOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "line_start" << std::endl; - return offset + GetSize(); -} - -uint32_t LineEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const -{ - buf->EmitChar(GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t LineEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "line_end" << std::endl; - return offset + GetSize(); -} - -uint32_t WordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const -{ - buf->EmitChar(GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t WordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "word_boundary" << std::endl; - return offset + GetSize(); -} - -uint32_t NotWordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const -{ - buf->EmitChar(GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t NotWordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, - uint32_t offset) const -{ - out << offset << ":\t" - << "not_word_boundary" << std::endl; - return offset + GetSize(); -} - -uint32_t AllOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const -{ - buf->EmitChar(GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t AllOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "all" << std::endl; - return offset + GetSize(); -} - -uint32_t DotsOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const -{ - buf->EmitChar(GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t DotsOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "dots" << std::endl; - return offset + GetSize(); -} - -uint32_t MatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; - return offset + GetSize(); -} - -uint32_t RangeOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "range\t"; - size_t size = buf.GetU16(offset + 1); - for (size_t i = 0; i < size; i++) { - out << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_FOUR)) << "\t" - << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + - (i * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_TWO)) - << "\t"; - } - out << std::endl; - return offset + size * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_THREE; -} - -uint32_t RangeOpCode::InsertOpCode(DynChunk *buf, const RangeSet &range_set) const -{ - buf->EmitChar(GetOpCode()); - size_t size = range_set.range_set_.size(); - buf->EmitU16(size); - for (auto range : range_set.range_set_) { - buf->EmitU16(range.first); - buf->EmitU16(range.second); - } - return GetDynChunkfSize(*buf); -} - -uint32_t Range32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "range32\t"; - size_t size = buf.GetU16(offset + 1); - for (size_t i = 0; i < size; i++) { - out << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_EIGHT)) << "\t" - << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + - (i * RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_FOUR)) - << "\t"; - } - out << std::endl; - return offset + size * +RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_THREE; -} - -uint32_t Range32OpCode::InsertOpCode(DynChunk *buf, const RangeSet &range_set) const -{ - buf->EmitChar(GetOpCode()); - size_t size = range_set.range_set_.size(); - buf->EmitU16(size); - for (auto range : range_set.range_set_) { - buf->EmitU32(range.first); - buf->EmitU32(range.second); - } - return GetDynChunkfSize(*buf); -} - -uint32_t MatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const -{ - buf->Insert(offset, GetSize()); - buf->PutU8(offset, GetOpCode()); - buf->PutU32(offset + 1, para); - return GetDynChunkfSize(*buf); -} - -uint32_t NegativeMatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "negative_match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; - return offset + GetSize(); -} - -uint32_t NegativeMatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const -{ - buf->Insert(offset, GetSize()); - buf->PutU8(offset, GetOpCode()); - buf->PutU32(offset + 1, para); - return GetDynChunkfSize(*buf); -} - -uint32_t PrevOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const -{ - buf->EmitChar(GetOpCode()); - return GetDynChunkfSize(*buf); -} - -uint32_t PrevOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "prev" << std::endl; - return offset + GetSize(); -} - -uint32_t BackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const -{ - auto capture = static_cast(para & 0xffU); // NOLINT(readability-magic-numbers) - buf->EmitChar(GetOpCode()); - buf->EmitChar(capture); - return GetDynChunkfSize(*buf); -} - -uint32_t BackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "backreference\t" << buf.GetU8(offset + 1) << std::endl; - return offset + GetSize(); -} - -uint32_t BackwardBackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const -{ - auto capture = static_cast(para & 0xffU); // NOLINT(readability-magic-numbers) - buf->EmitChar(GetOpCode()); - buf->EmitChar(capture); - return GetDynChunkfSize(*buf); -} - -uint32_t BackwardBackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const -{ - out << offset << ":\t" - << "backward_backreference\t" << buf.GetU8(offset + 1) << std::endl; - return offset + GetSize(); -} - -void RangeSet::Insert(uint32_t start, uint32_t end) -{ - if (start > end) { - return; - } - std::pair pair_element = std::make_pair(start, end); - if (range_set_.empty()) { - range_set_.emplace_back(pair_element); - } else { - for (auto iter = range_set_.begin(); iter != range_set_.end(); iter++) { - if (IsIntersect(start, end, iter->first, iter->second) || - IsAdjacent(start, end, iter->first, iter->second)) { - iter->first = std::min(iter->first, start); - iter->second = std::max(iter->second, end); - return; - } - if (iter->first > end) { - range_set_.insert(iter, pair_element); - return; - } - } - range_set_.emplace_back(pair_element); - } -} - -void RangeSet::Insert(const RangeSet &s1) -{ - if (s1.range_set_.empty()) { - return; - } - if (range_set_.empty()) { - range_set_ = s1.range_set_; - } else { - for (auto range : s1.range_set_) { - Insert(range.first, range.second); - } - Compress(); - } -} - -void RangeSet::Invert(bool is_utf16) -{ - uint32_t max_value = is_utf16 ? UINT32_MAX : UINT16_MAX; - if (range_set_.empty()) { - range_set_.emplace_back(std::make_pair(0, max_value)); - return; - } - - auto iter = range_set_.begin(); - auto iter2 = range_set_.begin(); - if (iter->first == 0 && iter->second == max_value) { - range_set_.clear(); - return; - } - iter2++; - - uint32_t first = iter->first; - - for (iter = range_set_.begin(); iter != range_set_.end(); iter++) { - if (iter->second == max_value) { - range_set_.erase(iter); - break; - } - iter->first = iter->second + 1; - if (iter2 != range_set_.end()) { - iter->second = iter2->first - 1; - iter2++; - } else { - iter->second = max_value; - } - } - if (first > 0) { - std::pair pair1 = std::make_pair(0, first - 1); - range_set_.push_front(pair1); - } - Compress(); -} - -void RangeSet::Compress() -{ - auto iter = range_set_.begin(); - auto iter2 = range_set_.begin(); - iter2++; - while (iter2 != range_set_.end()) { - if (IsIntersect(iter->first, iter->second, iter2->first, iter2->second) || - IsAdjacent(iter->first, iter->second, iter2->first, iter2->second)) { - iter->first = std::min(iter->first, iter2->first); - iter->second = std::max(iter->second, iter2->second); - iter2 = range_set_.erase(iter2); - } else { - iter++; - iter2++; - } - } -} -} // namespace panda::ecmascript diff --git a/runtime/regexp/regexp_opcode.h b/runtime/regexp/regexp_opcode.h deleted file mode 100644 index e33961a5583974eecae4f29c3e424c1c5ca6c903..0000000000000000000000000000000000000000 --- a/runtime/regexp/regexp_opcode.h +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Copyright (c) 2021 Huawei Device Co., Ltd. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ECMASCRIPT_REGEXP_OPCODE_H -#define ECMASCRIPT_REGEXP_OPCODE_H - -#include - -#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" - -namespace panda::ecmascript { -class RegExpOpCode { -public: - enum : uint8_t { - OP_SAVE_START = 0U, - OP_SAVE_END, - OP_CHAR, - OP_GOTO, - OP_SPLIT_FIRST, - OP_SPLIT_NEXT, - OP_MATCH_AHEAD, - OP_NEGATIVE_MATCH_AHEAD, - OP_MATCH, - OP_LOOP, - OP_LOOP_GREEDY, - OP_PUSH_CHAR, - OP_CHECK_CHAR, - OP_PUSH, - OP_POP, - OP_SAVE_RESET, - OP_LINE_START, - OP_LINE_END, - OP_WORD_BOUNDARY, - OP_NOT_WORD_BOUNDARY, - OP_ALL, - OP_DOTS, - OP_MATCH_END, - OP_PREV, - OP_RANGE, - OP_BACKREFERENCE, - OP_BACKWARD_BACKREFERENCE, - OP_CHAR32, - OP_RANGE32, - OP_INVALID, - }; - - static constexpr size_t OP_SIZE_ONE = 1; - static constexpr size_t OP_SIZE_TWO = 2; - static constexpr size_t OP_SIZE_THREE = 3; - static constexpr size_t OP_SIZE_FOUR = 4; - static constexpr size_t OP_SIZE_FIVE = 5; - static constexpr size_t OP_SIZE_EIGHT = 8; - static constexpr size_t OP_SIZE_NINE = 9; - static constexpr size_t OP_SIZE_THIRTEEN = 13; - - RegExpOpCode(uint8_t op_code, int size); - NO_COPY_SEMANTIC(RegExpOpCode); - NO_MOVE_SEMANTIC(RegExpOpCode); - - virtual ~RegExpOpCode() = default; - static RegExpOpCode *GetRegExpOpCode(const DynChunk &buf, int pc_offset); - static RegExpOpCode *GetRegExpOpCode(uint8_t op_code); - static void DumpRegExpOpCode(std::ostream &out, const DynChunk &buf); - inline uint8_t GetSize() const - { - return size_; - } - inline uint8_t GetOpCode() const - { - return op_code_; - } - inline int GetDynChunkfSize(const DynChunk &buf) const - { - return buf.size_; - } - virtual uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const = 0; - -private: - uint8_t op_code_ {0}; - uint8_t size_ {0}; -}; - -class SaveStartOpCode : public RegExpOpCode { -public: - SaveStartOpCode() : RegExpOpCode(OP_SAVE_START, RegExpOpCode::OP_SIZE_TWO) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~SaveStartOpCode() override = default; - NO_COPY_SEMANTIC(SaveStartOpCode); - NO_MOVE_SEMANTIC(SaveStartOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class SaveEndOpCode : public RegExpOpCode { -public: - SaveEndOpCode() : RegExpOpCode(OP_SAVE_END, RegExpOpCode::OP_SIZE_TWO) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~SaveEndOpCode() override = default; - NO_COPY_SEMANTIC(SaveEndOpCode); - NO_MOVE_SEMANTIC(SaveEndOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class CharOpCode : public RegExpOpCode { -public: - CharOpCode() : RegExpOpCode(OP_CHAR, RegExpOpCode::OP_SIZE_THREE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~CharOpCode() override = default; - NO_COPY_SEMANTIC(CharOpCode); - NO_MOVE_SEMANTIC(CharOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class GotoOpCode : public RegExpOpCode { -public: - GotoOpCode() : RegExpOpCode(OP_GOTO, RegExpOpCode::OP_SIZE_FIVE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - void UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const; - ~GotoOpCode() override = default; - NO_COPY_SEMANTIC(GotoOpCode); - NO_MOVE_SEMANTIC(GotoOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class SplitNextOpCode : public RegExpOpCode { -public: - SplitNextOpCode() : RegExpOpCode(OP_SPLIT_NEXT, RegExpOpCode::OP_SIZE_FIVE) {} - uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; - ~SplitNextOpCode() override = default; - NO_COPY_SEMANTIC(SplitNextOpCode); - NO_MOVE_SEMANTIC(SplitNextOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class SplitFirstOpCode : public RegExpOpCode { -public: - SplitFirstOpCode() : RegExpOpCode(OP_SPLIT_FIRST, RegExpOpCode::OP_SIZE_FIVE) {} - uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; - ~SplitFirstOpCode() override = default; - NO_COPY_SEMANTIC(SplitFirstOpCode); - NO_MOVE_SEMANTIC(SplitFirstOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class PushOpCode : public RegExpOpCode { -public: - PushOpCode() : RegExpOpCode(OP_PUSH, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t InsertOpCode(DynChunk *buf, uint32_t offset) const; - ~PushOpCode() override = default; - NO_COPY_SEMANTIC(PushOpCode); - NO_MOVE_SEMANTIC(PushOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class PopOpCode : public RegExpOpCode { -public: - PopOpCode() : RegExpOpCode(OP_POP, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t EmitOpCode(DynChunk *buf) const; - ~PopOpCode() override = default; - NO_COPY_SEMANTIC(PopOpCode); - NO_MOVE_SEMANTIC(PopOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class PushCharOpCode : public RegExpOpCode { -public: - PushCharOpCode() : RegExpOpCode(OP_PUSH_CHAR, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t InsertOpCode(DynChunk *buf, uint32_t offset) const; - ~PushCharOpCode() override = default; - NO_COPY_SEMANTIC(PushCharOpCode); - NO_MOVE_SEMANTIC(PushCharOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class CheckCharOpCode : public RegExpOpCode { -public: - CheckCharOpCode() : RegExpOpCode(OP_CHECK_CHAR, RegExpOpCode::OP_SIZE_FIVE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t offset) const; - ~CheckCharOpCode() override = default; - NO_COPY_SEMANTIC(CheckCharOpCode); - NO_MOVE_SEMANTIC(CheckCharOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class LoopOpCode : public RegExpOpCode { -public: - LoopOpCode() : RegExpOpCode(OP_LOOP, RegExpOpCode::OP_SIZE_THIRTEEN) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const; - ~LoopOpCode() override = default; - NO_COPY_SEMANTIC(LoopOpCode); - NO_MOVE_SEMANTIC(LoopOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class LoopGreedyOpCode : public RegExpOpCode { -public: - LoopGreedyOpCode() : RegExpOpCode(OP_LOOP_GREEDY, RegExpOpCode::OP_SIZE_THIRTEEN) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const; - ~LoopGreedyOpCode() override = default; - NO_COPY_SEMANTIC(LoopGreedyOpCode); - NO_MOVE_SEMANTIC(LoopGreedyOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class SaveResetOpCode : public RegExpOpCode { -public: - SaveResetOpCode() : RegExpOpCode(OP_SAVE_RESET, RegExpOpCode::OP_SIZE_THREE) {} - uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const; - ~SaveResetOpCode() override = default; - NO_COPY_SEMANTIC(SaveResetOpCode); - NO_MOVE_SEMANTIC(SaveResetOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class MatchOpCode : public RegExpOpCode { -public: - MatchOpCode() : RegExpOpCode(OP_MATCH, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~MatchOpCode() override = default; - NO_COPY_SEMANTIC(MatchOpCode); - NO_MOVE_SEMANTIC(MatchOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class MatchEndOpCode : public RegExpOpCode { -public: - MatchEndOpCode() : RegExpOpCode(OP_MATCH_END, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~MatchEndOpCode() override = default; - NO_COPY_SEMANTIC(MatchEndOpCode); - NO_MOVE_SEMANTIC(MatchEndOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class LineStartOpCode : public RegExpOpCode { -public: - LineStartOpCode() : RegExpOpCode(OP_LINE_START, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~LineStartOpCode() override = default; - NO_COPY_SEMANTIC(LineStartOpCode); - NO_MOVE_SEMANTIC(LineStartOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class LineEndOpCode : public RegExpOpCode { -public: - LineEndOpCode() : RegExpOpCode(OP_LINE_END, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~LineEndOpCode() override = default; - NO_COPY_SEMANTIC(LineEndOpCode); - NO_MOVE_SEMANTIC(LineEndOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class WordBoundaryOpCode : public RegExpOpCode { -public: - WordBoundaryOpCode() : RegExpOpCode(OP_WORD_BOUNDARY, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~WordBoundaryOpCode() override = default; - NO_COPY_SEMANTIC(WordBoundaryOpCode); - NO_MOVE_SEMANTIC(WordBoundaryOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class NotWordBoundaryOpCode : public RegExpOpCode { -public: - NotWordBoundaryOpCode() : RegExpOpCode(OP_NOT_WORD_BOUNDARY, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~NotWordBoundaryOpCode() override = default; - NO_COPY_SEMANTIC(NotWordBoundaryOpCode); - NO_MOVE_SEMANTIC(NotWordBoundaryOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class AllOpCode : public RegExpOpCode { -public: - AllOpCode() : RegExpOpCode(OP_ALL, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~AllOpCode() override = default; - NO_COPY_SEMANTIC(AllOpCode); - NO_MOVE_SEMANTIC(AllOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class DotsOpCode : public RegExpOpCode { -public: - DotsOpCode() : RegExpOpCode(OP_DOTS, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~DotsOpCode() override = default; - NO_COPY_SEMANTIC(DotsOpCode); - NO_MOVE_SEMANTIC(DotsOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class RangeSet { -public: - RangeSet() = default; - explicit RangeSet(uint32_t value) - { - Insert(value, value); - } - explicit RangeSet(uint32_t start, uint32_t end) - { - Insert(start, end); - } - explicit RangeSet(const std::list> &range_set) - { - range_set_ = range_set; - } - ~RangeSet() = default; - - inline bool IsIntersect(uint64_t start, uint64_t end, uint64_t start1, uint64_t end1) const - { - return ((start1 > start) && (start1 < end)) || ((start > start1) && (start < end1)); - } - inline bool IsAdjacent(uint64_t start, uint64_t end, uint64_t start1, uint64_t end1) const - { - return ((end == start1 || (end + 1) == start1)) || ((end1 == start) || (end1 + 1 == start)); - } - - inline bool operator==(const RangeSet &other) const - { - return range_set_ == other.range_set_; - } - - inline bool IsContain(uint32_t value) const - { - for (auto range : range_set_) { - if (value >= range.first && value <= range.second) { - return true; - } - } - return false; - } - inline uint32_t HighestValue() const - { - if (!range_set_.empty()) { - return range_set_.back().second; - } - return 0; - } - RangeSet(RangeSet const &) = default; - RangeSet &operator=(RangeSet const &) = default; - RangeSet(RangeSet &&) = default; - RangeSet &operator=(RangeSet &&) = default; - - void Insert(uint32_t start, uint32_t end); - void Insert(const RangeSet &s1); - void Invert(bool is_utf16); - void Compress(); - -private: - friend class RangeOpCode; - friend class Range32OpCode; - std::list> range_set_ {}; -}; - -class RangeOpCode : public RegExpOpCode { -public: - RangeOpCode() : RegExpOpCode(OP_RANGE, RegExpOpCode::OP_SIZE_ONE) {} - ~RangeOpCode() override = default; - NO_COPY_SEMANTIC(RangeOpCode); - NO_MOVE_SEMANTIC(RangeOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; - uint32_t InsertOpCode(DynChunk *buf, const RangeSet &range_set) const; -}; - -class MatchAheadOpCode : public RegExpOpCode { -public: - MatchAheadOpCode() : RegExpOpCode(OP_MATCH_AHEAD, RegExpOpCode::OP_SIZE_FIVE) {} - ~MatchAheadOpCode() override = default; - NO_COPY_SEMANTIC(MatchAheadOpCode); - NO_MOVE_SEMANTIC(MatchAheadOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; - uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; -}; - -class NegativeMatchAheadOpCode : public RegExpOpCode { -public: - NegativeMatchAheadOpCode() : RegExpOpCode(OP_NEGATIVE_MATCH_AHEAD, RegExpOpCode::OP_SIZE_FIVE) {} - uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; - ~NegativeMatchAheadOpCode() override = default; - NO_COPY_SEMANTIC(NegativeMatchAheadOpCode); - NO_MOVE_SEMANTIC(NegativeMatchAheadOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class PrevOpCode : public RegExpOpCode { -public: - PrevOpCode() : RegExpOpCode(OP_PREV, RegExpOpCode::OP_SIZE_ONE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~PrevOpCode() override = default; - NO_COPY_SEMANTIC(PrevOpCode); - NO_MOVE_SEMANTIC(PrevOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class BackReferenceOpCode : public RegExpOpCode { -public: - BackReferenceOpCode() : RegExpOpCode(OP_BACKREFERENCE, RegExpOpCode::OP_SIZE_TWO) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~BackReferenceOpCode() override = default; - NO_COPY_SEMANTIC(BackReferenceOpCode); - NO_MOVE_SEMANTIC(BackReferenceOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class BackwardBackReferenceOpCode : public RegExpOpCode { -public: - BackwardBackReferenceOpCode() : RegExpOpCode(OP_BACKWARD_BACKREFERENCE, RegExpOpCode::OP_SIZE_TWO) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~BackwardBackReferenceOpCode() override = default; - NO_COPY_SEMANTIC(BackwardBackReferenceOpCode); - NO_MOVE_SEMANTIC(BackwardBackReferenceOpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class Char32OpCode : public RegExpOpCode { -public: - Char32OpCode() : RegExpOpCode(OP_CHAR32, RegExpOpCode::OP_SIZE_FIVE) {} - uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; - ~Char32OpCode() override = default; - NO_COPY_SEMANTIC(Char32OpCode); - NO_MOVE_SEMANTIC(Char32OpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; -}; - -class Range32OpCode : public RegExpOpCode { -public: - Range32OpCode() : RegExpOpCode(OP_RANGE32, RegExpOpCode::OP_SIZE_ONE) {} - ~Range32OpCode() override = default; - NO_COPY_SEMANTIC(Range32OpCode); - NO_MOVE_SEMANTIC(Range32OpCode); - uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; - uint32_t InsertOpCode(DynChunk *buf, const RangeSet &range_set) const; -}; -} // namespace panda::ecmascript -#endif diff --git a/runtime/regexp/regexp_parser.cpp b/runtime/regexp/regexp_parser.cpp deleted file mode 100644 index 0a4cac3932363adece62d948a9539769195a29eb..0000000000000000000000000000000000000000 --- a/runtime/regexp/regexp_parser.cpp +++ /dev/null @@ -1,1459 +0,0 @@ -/* - * Copyright (c) 2021 Huawei Device Co., Ltd. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "plugins/ecmascript/runtime/regexp/regexp_parser.h" - -#include "plugins/ecmascript/runtime/base/string_helper.h" -#include "plugins/ecmascript/runtime/ecma_macros.h" -#include "plugins/ecmascript/runtime/regexp/regexp_opcode.h" -#include "libpandabase/utils/utils.h" -#include "securec.h" -#include "unicode/uchar.h" -#include "unicode/uniset.h" - -#define _NO_DEBUG_ - -namespace panda::ecmascript { -static constexpr uint32_t CACHE_SIZE = 128; -static constexpr uint32_t CHAR_MAXS = 128; -// NOLINTNEXTLINE(modernize-avoid-c-arrays) -static constexpr uint32_t ID_START_TABLE_ASCII[4] = { - /* $ A-Z _ a-z */ - 0x00000000, 0x00000010, 0x87FFFFFE, 0x07FFFFFE}; -static RangeSet G_RANGE_D(0x30, 0x39); // NOLINT(fuchsia-statically-constructed-objects, readability-magic-numbers) -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static RangeSet G_RANGE_S({ - std::pair(0x0009, 0x000D), // NOLINT(readability-magic-numbers) - std::pair(0x0020, 0x0020), // NOLINT(readability-magic-numbers) - std::pair(0x00A0, 0x00A0), // NOLINT(readability-magic-numbers) - std::pair(0x1680, 0x1680), // NOLINT(readability-magic-numbers) - std::pair(0x2000, 0x200A), // NOLINT(readability-magic-numbers) - /* 2028;LINE SEPARATOR;Zl;0;WS;;;;;N;;;;; */ - /* 2029;PARAGRAPH SEPARATOR;Zp;0;B;;;;;N;;;;; */ - std::pair(0x2028, 0x2029), // NOLINT(readability-magic-numbers) - std::pair(0x202F, 0x202F), // NOLINT(readability-magic-numbers) - std::pair(0x205F, 0x205F), // NOLINT(readability-magic-numbers) - std::pair(0x3000, 0x3000), // NOLINT(readability-magic-numbers) - /* FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;; */ - std::pair(0xFEFF, 0xFEFF), // NOLINT(readability-magic-numbers) -}); - -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static RangeSet G_RANGE_W({ - std::pair(0x0030, 0x0039), // NOLINT(readability-magic-numbers) - std::pair(0x0041, 0x005A), // NOLINT(readability-magic-numbers) - std::pair(0x005F, 0x005F), // NOLINT(readability-magic-numbers) - std::pair(0x0061, 0x007A), // NOLINT(readability-magic-numbers) -}); - -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static RangeSet G_REGEXP_IDENTIFY_START({ - std::pair(0x0024, 0x0024), // NOLINT(readability-magic-numbers) - std::pair(0x0041, 0x005A), // NOLINT(readability-magic-numbers) - std::pair(0x0061, 0x007A), // NOLINT(readability-magic-numbers) -}); - -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -static RangeSet G_REGEXP_IDENTIFY_CONTINUE({ - std::pair(0x0024, 0x0024), // NOLINT(readability-magic-numbers) - std::pair(0x0030, 0x0039), // NOLINT(readability-magic-numbers) - std::pair(0x0041, 0x005A), // NOLINT(readability-magic-numbers) - std::pair(0x0061, 0x007A), // NOLINT(readability-magic-numbers) -}); - -void RegExpParser::Parse() -{ - // dynbuffer head init [size,capture_count,statck_count,flags] - buffer_.EmitU32(0); - buffer_.EmitU32(0); - buffer_.EmitU32(0); - buffer_.EmitU32(0); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Parse Pattern------\n"); - // Pattern[U, N]:: - // Disjunction[?U, ?N] - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - Advance(); - SaveStartOpCode save_start_op; - int capture_index = capture_count_++; - save_start_op.EmitOpCode(&buffer_, capture_index); - ParseDisjunction(false); - if (c0_ != KEY_EOF) { - ParseError("extraneous characters at the end"); - return; - } - SaveEndOpCode save_end_op; - save_end_op.EmitOpCode(&buffer_, capture_index); - MatchEndOpCode match_end_op; - match_end_op.EmitOpCode(&buffer_, 0); - // dynbuffer head assignments - buffer_.PutU32(0, buffer_.size_); - buffer_.PutU32(NUM_CAPTURE__OFFSET, capture_count_); - buffer_.PutU32(NUM_STACK_OFFSET, stack_count_); - buffer_.PutU32(FLAGS_OFFSET, flags_); -#ifndef _NO_DEBUG_ - RegExpOpCode::DumpRegExpOpCode(std::cout, buffer_); -#endif -} - -void RegExpParser::ParseDisjunction(bool is_backward) -{ - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Parse Disjunction------\n"); - size_t start = buffer_.size_; - ParseAlternative(is_backward); - if (is_error_) { - return; - } - do { - if (c0_ == '|') { - SplitNextOpCode split_op; - uint32_t len = buffer_.size_ - start; - GotoOpCode goto_op; - split_op.InsertOpCode(&buffer_, start, len + goto_op.GetSize()); - uint32_t pos = goto_op.EmitOpCode(&buffer_, 0) - goto_op.GetSize(); - Advance(); - ParseAlternative(is_backward); - goto_op.UpdateOpPara(&buffer_, pos, buffer_.size_ - pos - goto_op.GetSize()); - } - } while (c0_ != KEY_EOF && c0_ != ')'); -} - -uint32_t RegExpParser::ParseOctalLiteral() -{ - // For compatibility with some other browsers (not all), we parse - // up to three octal digits with a value below 256. - // ES#prod-annexB-LegacyOctalEscapeSequence - uint32_t value = c0_ - '0'; - Advance(); - if (c0_ >= '0' && c0_ <= '7') { - value = value * OCTAL_VALUE + c0_ - '0'; - Advance(); - if (value < OCTAL_VALUE_RANGE && c0_ >= '0' && c0_ <= '7') { - value = value * OCTAL_VALUE + c0_ - '0'; - Advance(); - } - } - return value; -} - -bool RegExpParser::ParseUnlimitedLengthHexNumber(uint32_t max_value, uint32_t *value) -{ - uint32_t x = 0; - int d = static_cast(HexValue(c0_)); - if (d < 0) { - return false; - } - while (d >= 0) { - if (UNLIKELY(x > (std::numeric_limits::max() - static_cast(d)) / HEX_VALUE)) { - LOG_ECMA(FATAL) << "value overflow"; - return false; - } - x = x * HEX_VALUE + static_cast(d); - if (x > max_value) { - return false; - } - Advance(); - d = static_cast(HexValue(c0_)); - } - *value = x; - return true; -} - -// This parses RegExpUnicodeEscapeSequence as described in ECMA262. -bool RegExpParser::ParseUnicodeEscape(uint32_t *value) -{ - // Accept both \uxxxx and \u{xxxxxx} (if allowed). - // In the latter case, the number of hex digits between { } is arbitrary. - // \ and u have already been read. - if (c0_ == '{' && IsUtf16()) { - uint8_t *start = pc_ - 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - Advance(); - if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) { // NOLINT(readability-magic-numbers) - if (c0_ == '}') { - Advance(); - return true; - } - } - pc_ = start; - Advance(); - return false; - } - // \u but no {, or \u{...} escapes not allowed. - bool result = ParseHexEscape(UNICODE_HEX_VALUE, value); - if (result && IsUtf16() && U16_IS_LEAD(*value) && c0_ == '\\') { - // Attempt to read trail surrogate. - uint8_t *start = pc_ - 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (*pc_ == 'u') { - Advance(UNICODE_HEX_ADVANCE); - uint32_t trail; - if (ParseHexEscape(UNICODE_HEX_VALUE, &trail) && U16_IS_TRAIL(trail)) { - *value = U16_GET_SUPPLEMENTARY((*value), (trail)); // NOLINT(hicpp-signed-bitwise) - return true; - } - } - pc_ = start; - Advance(); - } - return result; -} - -bool RegExpParser::ParseHexEscape(int length, uint32_t *value) -{ - uint8_t *start = pc_ - 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - uint32_t val = 0; - for (int i = 0; i < length; ++i) { - uint32_t c = c0_; - int d = static_cast(HexValue(c)); - if (d < 0) { - pc_ = start; - Advance(); - return false; - } - val = val * HEX_VALUE + static_cast(d); - Advance(); - } - *value = val; - return true; -} - -// NOLINTNEXTLINE(readability-function-size) -void RegExpParser::ParseAlternative(bool is_backward) -{ - size_t start = buffer_.size_; - while (c0_ != '|' && c0_ != KEY_EOF && c0_ != ')') { - if (is_error_) { - return; - } - size_t atom_bc_start = buffer_.GetSize(); - int capture_index = 0; - bool is_atom = false; - switch (c0_) { - case '^': { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Assertion %c line start \n", c0_); - LineStartOpCode line_start_op; - line_start_op.EmitOpCode(&buffer_, 0); - Advance(); - break; - } - case '$': { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Assertion %c line end \n", c0_); - LineEndOpCode line_end_op; - line_end_op.EmitOpCode(&buffer_, 0); - Advance(); - break; - } - case '\\': { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Escape %c \n", c0_); - Advance(); - switch (c0_) { - case 'b': { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Assertion %c \n", c0_); - WordBoundaryOpCode word_boundary_op; - word_boundary_op.EmitOpCode(&buffer_, 0); - Advance(); - break; - } - case 'B': { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Assertion %c \n", c0_); - NotWordBoundaryOpCode not_word_boundary_op; - not_word_boundary_op.EmitOpCode(&buffer_, 0); - Advance(); - break; - } - default: { - is_atom = true; - int atom_value = ParseAtomEscape(is_backward); - if (atom_value != -1) { - if (IsIgnoreCase()) { - if (!IsUtf16()) { - atom_value = Canonicalize(atom_value, false); - } else { - icu::UnicodeSet set(atom_value, atom_value); - set.closeOver(USET_CASE_INSENSITIVE); - set.removeAllStrings(); - int32_t size = set.size(); - RangeOpCode range_op; - RangeSet range_result; - for (int32_t idx = 0; idx < size; idx++) { - int32_t uc = set.charAt(idx); - RangeSet cur_range(uc); - range_result.Insert(cur_range); - } - range_op.InsertOpCode(&buffer_, range_result); - break; - } - } - if (atom_value <= UINT16_MAX) { - CharOpCode char_op; - char_op.EmitOpCode(&buffer_, atom_value); - } else { - Char32OpCode char_op; - char_op.EmitOpCode(&buffer_, atom_value); - } - } - break; - } - } - break; - } - case '(': { - Advance(); - is_atom = ParseAssertionCapture(&capture_index, is_backward); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - Advance(); - break; - } - case '.': { - PrevOpCode prev_op; - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - if (IsDotAll()) { - AllOpCode all_op; - all_op.EmitOpCode(&buffer_, 0); - } else { - DotsOpCode dots_op; - dots_op.EmitOpCode(&buffer_, 0); - } - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Atom %c match any \n", c0_); - is_atom = true; - Advance(); - break; - } - case '[': { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Atom %c match range \n", c0_); - is_atom = true; - PrevOpCode prev_op; - Advance(); - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - bool is_invert = false; - if (c0_ == '^') { - is_invert = true; - Advance(); - } - RangeSet range_result; - if (!ParseClassRanges(&range_result)) { - break; - } - if (is_invert) { - range_result.Invert(IsUtf16()); - } - uint32_t high_value = range_result.HighestValue(); - if (high_value <= UINT16_MAX) { - RangeOpCode range_op; - range_op.InsertOpCode(&buffer_, range_result); - } else { - Range32OpCode range_op; - range_op.InsertOpCode(&buffer_, range_result); - } - - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - break; - } - case '*': - case '+': - case '?': - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - ParseError("nothing to repeat"); - return; - case '{': { - uint8_t *begin = pc_ - 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - int dummy; - if (ParserIntervalQuantifier(&dummy, &dummy)) { - ParseError("nothing to repeat"); - return; - } - pc_ = begin; - Advance(); - } - [[fallthrough]]; - case '}': - case ']': - if (IsUtf16()) { - ParseError("syntax error"); - return; - } - [[fallthrough]]; - default: { - // PatternCharacter - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("PatternCharacter %c\n", c0_); - is_atom = true; - { - PrevOpCode prev_op; - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - uint32_t matched_char = c0_; - if (c0_ > (INT8_MAX + 1)) { - Prev(); - int i = 0; - UChar32 c; - int32_t length = end_ - pc_ + 1; - // NOLINTNEXTLINE(hicpp-signed-bitwise) - U8_NEXT(pc_, i, length, c); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - matched_char = static_cast(c); - pc_ += i; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } - if (IsIgnoreCase()) { - matched_char = static_cast(Canonicalize(static_cast(matched_char), IsUtf16())); - } - if (matched_char > UINT16_MAX) { - Char32OpCode char_op; - char_op.EmitOpCode(&buffer_, matched_char); - } else { - CharOpCode char_op; - char_op.EmitOpCode(&buffer_, matched_char); - } - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - } - Advance(); - break; - } - } - if (is_atom && !is_error_) { - ParseQuantifier(atom_bc_start, capture_index, capture_count_ - 1); - } - if (is_backward) { - size_t end = buffer_.GetSize(); - size_t term_size = end - atom_bc_start; - size_t move_size = end - start; - buffer_.Expand(end + term_size); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (memmove_s(buffer_.buf_ + start + term_size, move_size, buffer_.buf_ + start, move_size) != EOK) { - LOG_ECMA(FATAL) << "memmove_s failed"; - UNREACHABLE(); - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (memcpy_s(buffer_.buf_ + start, term_size, buffer_.buf_ + end, term_size) != EOK) { - LOG_ECMA(FATAL) << "memcpy_s failed"; - UNREACHABLE(); - } - } - } -} - -int RegExpParser::FindGroupName(const PandaString &name) -{ - size_t len = 0; - size_t name_len = name.size(); - const char *p = reinterpret_cast(group_names_.buf_); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - const char *buf_end = reinterpret_cast(group_names_.buf_) + group_names_.size_; - int capture_index = 1; - while (p < buf_end) { - len = strlen(p); - if (len == name_len && memcmp(name.c_str(), p, name_len) == 0) { - return capture_index; - } - p += len + 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - capture_index++; - } - return -1; -} - -bool RegExpParser::ParseAssertionCapture(int *capture_index, bool is_backward) -{ - bool is_atom = false; - do { - if (c0_ == '?') { - Advance(); - switch (c0_) { - // (?=Disjunction[?U, ?N]) - case '=': { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Assertion(?= Disjunction)\n"); - Advance(); - uint32_t start = buffer_.size_; - ParseDisjunction(is_backward); - MatchOpCode match_op; - match_op.EmitOpCode(&buffer_, 0); - MatchAheadOpCode match_ahead_op; - uint32_t len = buffer_.size_ - start; - match_ahead_op.InsertOpCode(&buffer_, start, len); - break; - } - // (?!Disjunction[?U, ?N]) - case '!': { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Assertion(?! Disjunction)\n"); - uint32_t start = buffer_.size_; - Advance(); - ParseDisjunction(is_backward); - MatchOpCode match_op; - match_op.EmitOpCode(&buffer_, 0); - NegativeMatchAheadOpCode match_ahead_op; - uint32_t len = buffer_.size_ - start; - match_ahead_op.InsertOpCode(&buffer_, start, len); - break; - } - case '<': { - Advance(); - // (?<=Disjunction[?U, ?N]) - if (c0_ == '=') { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Assertion(?<= Disjunction)\n"); - Advance(); - uint32_t start = buffer_.size_; - ParseDisjunction(true); - MatchOpCode match_op; - match_op.EmitOpCode(&buffer_, 0); - MatchAheadOpCode match_ahead_op; - uint32_t len = buffer_.size_ - start; - match_ahead_op.InsertOpCode(&buffer_, start, len); - // (?(&pc_); - if (!ParseGroupSpecifier(pp, name)) { - ParseError("GroupName Syntax error."); - return false; - } - if (FindGroupName(name) > 0) { - ParseError("Duplicate GroupName error."); - return false; - } - group_names_.EmitStr(name.c_str()); - new_group_names_.push_back(name); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("group name %s", name.c_str()); - Advance(); - goto parseCapture; // NOLINT(cppcoreguidelines-avoid-goto) - } - break; - } - // (?:Disjunction[?U, ?N]) - case ':': - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Atom(?<: Disjunction)\n"); - is_atom = true; - Advance(); - ParseDisjunction(is_backward); - break; - default: - Advance(); - ParseError("? Syntax error."); - return false; - } - } else { - group_names_.EmitChar(0); - parseCapture: - is_atom = true; - *capture_index = capture_count_++; - SaveEndOpCode save_end_op; - SaveStartOpCode save_start_op; - if (is_backward) { - save_end_op.EmitOpCode(&buffer_, *capture_index); - } else { - save_start_op.EmitOpCode(&buffer_, *capture_index); - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("capture start %d \n", *capture_index); - ParseDisjunction(is_backward); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("capture end %d \n", *capture_index); - if (is_backward) { - save_start_op.EmitOpCode(&buffer_, *capture_index); - } else { - save_end_op.EmitOpCode(&buffer_, *capture_index); - } - } - } while (c0_ != ')' && c0_ != KEY_EOF); - if (c0_ != ')') { - ParseError("capture syntax error"); - return false; - } - return is_atom; -} - -int RegExpParser::ParseDecimalDigits() -{ - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Parse DecimalDigits------\n"); - uint32_t result = 0; - bool overflow = false; - while (true) { - if (c0_ < '0' || c0_ > '9') { - break; - } - if (!overflow) { - if (UNLIKELY(result > (INT32_MAX - c0_ + '0') / DECIMAL_DIGITS_ADVANCE)) { - overflow = true; - } else { - result = result * DECIMAL_DIGITS_ADVANCE + c0_ - '0'; - } - } - Advance(); - } - if (overflow) { - return INT32_MAX; - } - return result; -} - -bool RegExpParser::ParserIntervalQuantifier(int *pmin, int *pmax) -{ - // Quantifier:: - // QuantifierPrefix - // QuantifierPrefix? - // QuantifierPrefix:: - // * - // + - // ? - // {DecimalDigits} - // {DecimalDigits,} - // {DecimalDigits,DecimalDigits} - Advance(); - *pmin = ParseDecimalDigits(); - *pmax = *pmin; - switch (c0_) { - case ',': { - Advance(); - if (c0_ == '}') { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("QuantifierPrefix{DecimalDigits,}\n"); - *pmax = INT32_MAX; - Advance(); - } else { - *pmax = ParseDecimalDigits(); - if (c0_ == '}') { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("QuantifierPrefix{DecimalDigits,DecimalDigits}\n"); - Advance(); - } else { - return false; - } - } - break; - } - case '}': - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("QuantifierPrefix{DecimalDigits}\n"); - Advance(); - break; - default: - Advance(); - return false; - } - return true; -} - -void RegExpParser::ParseQuantifier(size_t atom_bc_start, int capture_start, int capture_end) -{ - int min = -1; - int max = -1; - bool is_greedy = true; - switch (c0_) { - case '*': - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("QuantifierPrefix %c\n", c0_); - min = 0; - max = INT32_MAX; - Advance(); - break; - case '+': - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("QuantifierPrefix %c\n", c0_); - min = 1; - max = INT32_MAX; - Advance(); - break; - case '?': - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("QuantifierPrefix %c\n", c0_); - Advance(); - min = 0; - max = 1; - break; - case '{': { - uint8_t *start = pc_ - 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (!ParserIntervalQuantifier(&min, &max)) { - pc_ = start; - Advance(); // back to '{' - return; - } - if (min > max) { - ParseError("Invalid repetition count"); - return; - } - break; - } - default: - break; - } - if (c0_ == '?') { - is_greedy = false; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Quantifier::QuantifierPrefix?\n"); - Advance(); - } else if (c0_ == '?' || c0_ == '+' || c0_ == '*' || c0_ == '{') { - ParseError("nothing to repeat"); - return; - } - if (min != -1 && max != -1) { - stack_count_++; - PushOpCode push_op; - push_op.InsertOpCode(&buffer_, atom_bc_start); - atom_bc_start += push_op.GetSize(); - - if (capture_start != 0) { - SaveResetOpCode save_reset_op; - save_reset_op.InsertOpCode(&buffer_, atom_bc_start, capture_start, capture_end); - } - - // zero advance check - if (max == INT32_MAX) { - stack_count_++; - PushCharOpCode push_char_op; - push_char_op.InsertOpCode(&buffer_, atom_bc_start); - CheckCharOpCode check_char_op; - // NOLINTNEXTLINE(readability-magic-numbers) - check_char_op.EmitOpCode(&buffer_, RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_LOOP)->GetSize()); - } - - if (is_greedy) { - LoopGreedyOpCode loop_op; - loop_op.EmitOpCode(&buffer_, atom_bc_start - buffer_.GetSize() - loop_op.GetSize(), min, max); - } else { - LoopOpCode loop_op; - loop_op.EmitOpCode(&buffer_, atom_bc_start - buffer_.GetSize() - loop_op.GetSize(), min, max); - } - - if (min == 0) { - if (is_greedy) { - SplitNextOpCode split_next_op; - split_next_op.InsertOpCode(&buffer_, atom_bc_start, buffer_.GetSize() - atom_bc_start); - } else { - SplitFirstOpCode split_first_op; - split_first_op.InsertOpCode(&buffer_, atom_bc_start, buffer_.GetSize() - atom_bc_start); - } - } - - PopOpCode pop_op; - pop_op.EmitOpCode(&buffer_); - } -} - -bool RegExpParser::ParseGroupSpecifier(const uint8_t **pp, PandaString &name) -{ - const uint8_t *p = *pp; - uint32_t c; - std::array buffer {}; - char *q = buffer.data(); - while (true) { - if (p <= end_) { - c = *p; - } else { - c = KEY_EOF; - } - if (c == '\\') { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - p++; - if (*p != 'u') { - return false; - } - if (!ParseUnicodeEscape(&c)) { - return false; - } - } else if (c == '>') { - break; - } else if (c > CACHE_SIZE && c != KEY_EOF) { - c = static_cast(base::StringHelper::UnicodeFromUtf8(p, UTF8_CHAR_LEN_MAX, &p)); - } else if (c != KEY_EOF) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - p++; - } else { - return false; - } - if (q == buffer.data()) { - if (IsIdentFirst(c) != 0) { - return false; - } - } else { - if (!u_isIDPart(c)) { - return false; - } - } - if (q != nullptr) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - *q++ = c; - } - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - p++; - *pp = p; - name = buffer.data(); - return true; -} - -int RegExpParser::ParseCaptureCount(const char *group_name) -{ - const uint8_t *p = nullptr; - int capture_index = 1; - PandaString name; - has_named_captures_ = 0; - for (p = base_; p < end_; p++) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - switch (*p) { - case '(': { - if (p[1] == '?') { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (p[CAPTURE_CONUT_ADVANCE - 1] == '<' && p[CAPTURE_CONUT_ADVANCE] != '!' && - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - p[CAPTURE_CONUT_ADVANCE] != '=') { - has_named_captures_ = 1; - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - p += CAPTURE_CONUT_ADVANCE; - if (group_name != nullptr) { - if (ParseGroupSpecifier(&p, name)) { - if (strcmp(name.c_str(), group_name) == 0) { - return capture_index; - } - } - } - capture_index++; - } - } else { - capture_index++; - } - break; - } - case '\\': - p++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - break; - case '[': { - while (p < end_ && *p != ']') { - if (*p == '\\') { - p++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } - p++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } - break; - } - default: - break; - } - } - return capture_index; -} - -// NOLINTNEXTLINE(readability-function-size) -int RegExpParser::ParseAtomEscape(bool is_backward) -{ - // AtomEscape[U, N]:: - // DecimalEscape - // CharacterClassEscape[?U] - // CharacterEscape[?U] - // [+N]kGroupName[?U] - int result = -1; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Parse AtomEscape------\n"); - PrevOpCode prev_op; - switch (c0_) { - case KEY_EOF: - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - ParseError("unexpected end"); - break; - // DecimalEscape - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("NonZeroDigit %c\n", c0_); - int capture = ParseDecimalDigits(); - if (capture > capture_count_ - 1 && capture > ParseCaptureCount(nullptr) - 1) { - ParseError("invalid backreference count"); - break; - } - if (is_backward) { - BackwardBackReferenceOpCode back_reference_op; - back_reference_op.EmitOpCode(&buffer_, capture); - } else { - BackReferenceOpCode back_reference_op; - back_reference_op.EmitOpCode(&buffer_, capture); - } - break; - } - // CharacterClassEscape - case 'd': { - // [0-9] - RangeOpCode range_op; - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - range_op.InsertOpCode(&buffer_, G_RANGE_D); - goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) - break; - } - case 'D': { - // [^0-9] - RangeSet atom_range(G_RANGE_D); - atom_range.Invert(IsUtf16()); - Range32OpCode range_op; - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - range_op.InsertOpCode(&buffer_, atom_range); - goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) - break; - } - case 's': { - // [\f\n\r\t\v] - RangeOpCode range_op; - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - range_op.InsertOpCode(&buffer_, G_RANGE_S); - goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) - break; - } - case 'S': { - RangeSet atom_range(G_RANGE_S); - Range32OpCode range_op; - atom_range.Invert(IsUtf16()); - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - range_op.InsertOpCode(&buffer_, atom_range); - goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) - break; - } - case 'w': { - // [A-Za-z0-9] - RangeOpCode range_op; - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - range_op.InsertOpCode(&buffer_, G_RANGE_W); - goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) - break; - } - case 'W': { - // [^A-Za-z0-9] - RangeSet atom_range(G_RANGE_W); - atom_range.Invert(IsUtf16()); - Range32OpCode range_op; - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - range_op.InsertOpCode(&buffer_, atom_range); - goto parseLookBehind; // NOLINT(cppcoreguidelines-avoid-goto) - break; - } - // P{UnicodePropertyValueExpression} - // p{UnicodePropertyValueExpression} - case 'P': - case 'p': - // [+N]kGroupName[?U] - case 'k': { - Advance(); - if (c0_ != '<') { - if (!IsUtf16() || HasNamedCaptures()) { - ParseError("expecting group name."); - break; - } - } - Advance(); - Prev(); - PandaString name; - auto **pp = const_cast(&pc_); - if (!ParseGroupSpecifier(pp, name)) { - ParseError("GroupName Syntax error."); - break; - } - int postion = FindGroupName(name); - if (postion < 0) { - postion = ParseCaptureCount(name.c_str()); - if (postion < 0 && (!IsUtf16() || HasNamedCaptures())) { - ParseError("group name not defined"); - break; - } - } - if (is_backward) { - BackwardBackReferenceOpCode back_reference_op; - back_reference_op.EmitOpCode(&buffer_, postion); - } else { - BackReferenceOpCode back_reference_op; - back_reference_op.EmitOpCode(&buffer_, postion); - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - Advance(); - break; - } - parseLookBehind : { - if (is_backward) { - prev_op.EmitOpCode(&buffer_, 0); - } - Advance(); - break; - } - default: - result = ParseCharacterEscape(); - break; - } - return result; -} - -int RegExpParser::RecountCaptures() -{ - if (total_capture_count_ < 0) { - const char *name = reinterpret_cast(group_names_.buf_); - total_capture_count_ = ParseCaptureCount(name); - } - return total_capture_count_; -} -bool RegExpParser::HasNamedCaptures() -{ - if (has_named_captures_ < 0) { - RecountCaptures(); - } - return false; -} - -int RegExpParser::ParseCharacterEscape() -{ - // CharacterEscape[U]:: - // ControlEscape - // c ControlLetter - // 0 [lookahead ∉ DecimalDigit] - // HexEscapeSequence - // RegExpUnicodeEscapeSequence[?U] - // IdentityEscape[?U] - uint32_t result = 0; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - switch (c0_) { - // ControlEscape - case 'f': - result = '\f'; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("ControlEscape %c\n", c0_); - Advance(); - break; - case 'n': - result = '\n'; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("ControlEscape %c\n", c0_); - Advance(); - break; - case 'r': - result = '\r'; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("ControlEscape %c\n", c0_); - Advance(); - break; - case 't': - result = '\t'; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("ControlEscape %c\n", c0_); - Advance(); - break; - case 'v': - result = '\v'; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("ControlEscape %c\n", c0_); - Advance(); - break; - // c ControlLetter - case 'c': { - Advance(); - if ((c0_ >= 'A' && c0_ <= 'Z') || (c0_ >= 'a' && c0_ <= 'z')) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("ControlLetter %c\n", c0_); - result = static_cast(c0_) & 0x1f; // NOLINT(readability-magic-numbers, hicpp-signed-bitwise) - Advance(); - } else { - if (!IsUtf16()) { - pc_--; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - result = '\\'; - } else { - ParseError("Invalid control letter"); - return -1; - } - } - break; - } - case '0': { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("CharacterEscape 0 [lookahead ∉ DecimalDigit]\n"); - if (IsUtf16() && !(*pc_ >= '0' && *pc_ <= '9')) { // NOLINT(readability-magic-numbers) - Advance(); - result = 0; - break; - } - [[fallthrough]]; - } - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': { - if (IsUtf16()) { - // With /u, decimal escape is not interpreted as octal character code. - ParseError("Invalid class escape"); - return 0; - } - result = ParseOctalLiteral(); - break; - } - // ParseHexEscapeSequence - // ParseRegExpUnicodeEscapeSequence - case 'x': { - Advance(); - if (ParseHexEscape(UNICODE_HEX_ADVANCE, &result)) { - return result; - } - if (IsUtf16()) { - ParseError("Invalid class escape"); - return -1; - } - result = 'x'; - break; - } - case 'u': { - Advance(); - if (ParseUnicodeEscape(&result)) { - return result; - } - if (IsUtf16()) { - // With /u, invalid escapes are not treated as identity escapes. - ParseError("Invalid unicode escape"); - return 0; - } - // If \u is not followed by a two-digit hexadecimal, treat it - // as an identity escape. - result = 'u'; - break; - } - // IdentityEscape[?U] - case '$': - case '(': - case ')': - case '*': - case '+': - case '.': - case '/': - case '?': - case '[': - case '\\': - case ']': - case '^': - case '{': - case '|': - case '}': - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("IdentityEscape %c\n", c0_); - result = c0_; - Advance(); - break; - default: { - if (IsUtf16()) { - ParseError("Invalid unicode escape"); - return 0; - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("SourceCharacter %c\n", c0_); - result = c0_; - if (result < CHAR_MAXS) { - Advance(); - } - break; - } - } - return result; -} - -bool RegExpParser::ParseClassRanges(RangeSet *result) -{ - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Parse ClassRanges------\n"); - while (c0_ != ']') { - RangeSet s1; - uint32_t c1 = ParseClassAtom(&s1); - if (c1 == UINT32_MAX) { - ParseError("invalid class range"); - return false; - } - - int next_c0 = *pc_; - if (c0_ == '-' && next_c0 != ']') { - if (c1 == CLASS_RANGE_BASE) { - if (IsUtf16()) { - ParseError("invalid class range"); - return false; - } - result->Insert(s1); - continue; - } - Advance(); - RangeSet s2; - uint32_t c2 = ParseClassAtom(&s2); - if (c2 == UINT32_MAX) { - ParseError("invalid class range"); - return false; - } - if (c2 == CLASS_RANGE_BASE) { - if (IsUtf16()) { - ParseError("invalid class range"); - return false; - } - result->Insert(s2); - continue; - } - if (c1 < INT8_MAX) { - if (c1 > c2) { - ParseError("invalid class range"); - return false; - } - } - if (IsIgnoreCase()) { - c1 = static_cast(Canonicalize(c1, IsUtf16())); - c2 = static_cast(Canonicalize(c2, IsUtf16())); - } - - result->Insert(c1, c2); - } else { - result->Insert(s1); - } - } - Advance(); - return true; -} - -uint32_t RegExpParser::ParseClassAtom(RangeSet *atom) -{ - uint32_t ret = UINT32_MAX; - switch (c0_) { - case '\\': { - Advance(); - ret = static_cast(ParseClassEscape(atom)); - break; - } - case KEY_EOF: - break; - case 0: { - if (pc_ >= end_) { - return UINT32_MAX; - } - [[fallthrough]]; - } - default: { - uint32_t value = c0_; - size_t u16_size = 0; - if (c0_ > INT8_MAX) { - pc_ -= 1; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - auto u16_result = utf::ConvertUtf8ToUtf16Pair(pc_, true); - value = u16_result.first; - u16_size = u16_result.second; - Advance(u16_size + 1); - } else { - Advance(); - } - if (IsIgnoreCase()) { - value = static_cast(Canonicalize(value, IsUtf16())); - } - atom->Insert(RangeSet(value)); - ret = value; - break; - } - } - return ret; -} - -int RegExpParser::ParseClassEscape(RangeSet *atom) -{ - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Parse ClassEscape------\n"); - int result = -1; - switch (c0_) { - case 'b': - Advance(); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("ClassEscape %c", 'b'); - result = '\b'; - atom->Insert(RangeSet(static_cast('\b'))); - break; - case '-': - Advance(); - result = '-'; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("ClassEscape %c", '-'); - atom->Insert(RangeSet(static_cast('-'))); - break; - // CharacterClassEscape - case 'd': - case 'D': - result = CLASS_RANGE_BASE; - atom->Insert(G_RANGE_D); - if (c0_ == 'D') { - atom->Invert(IsUtf16()); - } - Advance(); - break; - case 's': - case 'S': - result = CLASS_RANGE_BASE; - atom->Insert(G_RANGE_S); - if (c0_ == 'S') { - atom->Invert(IsUtf16()); - } - Advance(); - break; - case 'w': - case 'W': - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("ClassEscape::CharacterClassEscape %c\n", c0_); - result = CLASS_RANGE_BASE; - atom->Insert(G_RANGE_W); - if (c0_ == 'W') { - atom->Invert(IsUtf16()); - } - Advance(); - break; - // P{UnicodePropertyValueExpression} - // p{UnicodePropertyValueExpression} - case 'P': - case 'p': - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("Warning: \\p is not supported in ECMA 2015!"); - Advance(); - if (c0_ == '{') { - Advance(); - if (c0_ == '}') { - break; // p{}, invalid - } - bool is_value = false; - ParseUnicodePropertyValueCharacters(&is_value); - if (!is_value && c0_ == '=') { - // UnicodePropertyName = UnicodePropertyValue - Advance(); - if (c0_ == '}') { - break; // p{xxx=}, invalid - } - ParseUnicodePropertyValueCharacters(&is_value); - } - if (c0_ != '}') { - break; // p{xxx, invalid - } - // should do atom->Invert() here after ECMA 9.0 - Advance(); - result = CLASS_RANGE_BASE; - } - break; - default: - result = ParseCharacterEscape(); - int value = result; - if (IsIgnoreCase()) { - value = Canonicalize(value, IsUtf16()); - } - atom->Insert(RangeSet(static_cast(value))); - break; - } - return result; -} - -void RegExpParser::ParseUnicodePropertyValueCharacters(bool *is_value) -{ - if ((c0_ >= 'A' && c0_ <= 'Z') || (c0_ >= 'a' && c0_ <= 'z')) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("UnicodePropertyCharacter::ControlLetter %c\n", c0_); - } else if (c0_ == '_') { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("UnicodePropertyCharacter:: _ \n"); - } else if (c0_ >= '0' && c0_ <= '9') { - *is_value = true; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("UnicodePropertyValueCharacter::DecimalDigit %c\n", c0_); - } else { - return; - } - Advance(); - ParseUnicodePropertyValueCharacters(is_value); -} - -// NOLINTNEXTLINE(cert-dcl50-cpp) -void RegExpParser::PrintF(const char *fmt, ...) -{ -#ifndef _NO_DEBUG_ - va_list args; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,) - va_start(args, fmt); - vprintf(fmt, args); - va_end(args); -#else - (void)fmt; -#endif -} - -void RegExpParser::ParseError(const char *error_message) -{ - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("error: "); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF(error_message); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - PrintF("\n"); - SetIsError(); - size_t length = strlen(error_message) + 1; - if (memcpy_s(error_msg_, length, error_message, length) != EOK) { - LOG_ECMA(FATAL) << "memcpy_s failed"; - UNREACHABLE(); - } -} - -int RegExpParser::IsIdentFirst(uint32_t c) -{ - if (c < CACHE_SIZE) { - // NOLINTNEXTLINE(hicpp-signed-bitwise - return (ID_START_TABLE_ASCII[c >> 5] >> (c & 31)) & 1; // 5: Shift five bits 31: and operation binary of 31 - } - return static_cast(u_isIDStart(c)); -} -} // namespace panda::ecmascript \ No newline at end of file diff --git a/runtime/regexp/regexp_parser.h b/runtime/regexp/regexp_parser.h deleted file mode 100644 index 11c37646c9611678ee772453cae3ed123a5e7e4d..0000000000000000000000000000000000000000 --- a/runtime/regexp/regexp_parser.h +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (c) 2021 Huawei Device Co., Ltd. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ECMASCRIPT_REGEXP_PARSER_H -#define ECMASCRIPT_REGEXP_PARSER_H - -#include -#include -#include -#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" -#include "plugins/ecmascript/runtime/regexp/regexp_opcode.h" -#include "unicode/stringpiece.h" -#include "unicode/uchar.h" -#include "unicode/utf16.h" -#include "unicode/utf8.h" -#include "unicode/utypes.h" -#include "unicode/udata.h" - -namespace panda::ecmascript { -class RegExpParser { -public: - static constexpr auto FLAG_GLOBAL = (1U << 0U); - static constexpr auto FLAG_IGNORECASE = (1U << 1U); - static constexpr auto FLAG_MULTILINE = (1U << 2U); - static constexpr auto FLAG_DOTALL = (1U << 3U); - static constexpr auto FLAG_UTF16 = (1U << 4U); - static constexpr auto FLAG_STICKY = (1U << 5U); - static const uint32_t KEY_EOF = UINT32_MAX; - static constexpr int CLASS_RANGE_BASE = 0x40000000; - static constexpr uint32_t NUM_CAPTURE__OFFSET = 4; - static constexpr uint32_t NUM_STACK_OFFSET = 8; - static constexpr uint32_t OCTAL_VALUE = 8; - static constexpr uint32_t OCTAL_VALUE_RANGE = 32; - static constexpr uint32_t HEX_VALUE = 16; - static constexpr int32_t DECIMAL_DIGITS_ADVANCE = 10; - static constexpr uint32_t FLAGS_OFFSET = 12; - static constexpr uint32_t OP_START_OFFSET = 16; - static constexpr uint32_t UNICODE_HEX_VALUE = 4; - static constexpr uint32_t UNICODE_HEX_ADVANCE = 2; - static constexpr uint32_t CAPTURE_CONUT_ADVANCE = 3; - static constexpr uint32_t UTF8_CHAR_LEN_MAX = 6; - - explicit RegExpParser() = default; - - ~RegExpParser() - { - Clear(); - } - - NO_COPY_SEMANTIC(RegExpParser); - NO_MOVE_SEMANTIC(RegExpParser); - - inline void Init(char *source, size_t length, uint32_t flags) - { - pc_ = reinterpret_cast(source); - base_ = pc_; - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - end_ = reinterpret_cast(source) + length - 1; - flags_ = flags; - } - - void Parse(); - void ParseDisjunction(bool is_backward); - void ParseAlternative(bool is_backward); - bool ParseAssertionCapture(int *capture_index, bool is_backward); - void ParseQuantifier(size_t atom_bc_start, int capture_start, int capture_end); - int ParseDecimalDigits(); - int ParseAtomEscape(bool is_backward); - int ParseCharacterEscape(); - bool ParseGroupSpecifier(const uint8_t **pp, PandaString &name); - int ParseCaptureCount(const char *group_name); - bool ParseClassRanges(RangeSet *result); - void ParseNonemptyClassRangesNoDash(DynChunk *buffer); - uint32_t ParseClassAtom(RangeSet *atom); - int ParseClassEscape(RangeSet *atom); - void ParseError(const char *error_message); - void ParseUnicodePropertyValueCharacters(bool *is_value); - int FindGroupName(const PandaString &name); - uint32_t ParseOctalLiteral(); - bool ParseHexEscape(int length, uint32_t *value); - bool ParseUnlimitedLengthHexNumber(uint32_t max_value, uint32_t *value); - bool ParseUnicodeEscape(uint32_t *value); - bool ParserIntervalQuantifier(int *pmin, int *pmax); - bool HasNamedCaptures(); - int ParseEscape(const uint8_t **pp, int is_utf16); - int RecountCaptures(); - int IsIdentFirst(uint32_t c); - - inline PandaVector GetGroupNames() const - { - return new_group_names_; - } - - inline size_t GetGroupNamesSize() const - { - return group_names_.size_; - } - - inline bool IsError() const - { - return is_error_; - } - - inline uint8_t *GetOriginBuffer() const - { - return buffer_.buf_; - } - - inline size_t GetOriginBufferSize() const - { - return buffer_.size_; - } - - inline PandaString GetErrorMsg() const - { - if (is_error_) { - return PandaString(error_msg_); - } - return PandaString(""); - } - - inline bool IsGlobal() const - { - return (flags_ & FLAG_GLOBAL) != 0; - } - - inline bool IsIgnoreCase() const - { - return (flags_ & FLAG_IGNORECASE) != 0; - } - - inline bool IsMultiline() const - { - return (flags_ & FLAG_MULTILINE) != 0; - } - - inline bool IsDotAll() const - { - return (flags_ & FLAG_DOTALL) != 0; - } - - inline bool IsUtf16() const - { - return (flags_ & FLAG_UTF16) != 0; - } - - inline bool IsStick() const - { - return (flags_ & FLAG_STICKY) != 0; - } - - inline static int Canonicalize(int c, bool is_unicode) - { - if (c < TMP_BUF_SIZE) { // NOLINTNEXTLINE(readability-magic-numbers) - if (c >= 'a' && c <= 'z') { - c = c - 'a' + 'A'; - } - } else { - if (is_unicode) { - c = u_toupper(static_cast(c)); - } - } - return c; - } - -private: - friend class RegExpExecutor; - static constexpr int TMP_BUF_SIZE = 128; - void Clear() - { - base_ = nullptr; - pc_ = nullptr; - end_ = nullptr; - c0_ = KEY_EOF; - is_error_ = false; - } - - void Advance() - { - if (pc_ <= end_) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - c0_ = *pc_++; - } else { - c0_ = KEY_EOF; - } - } - - void Advance(int offset) - { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - pc_ += offset - 1; - Advance(); - } - - void Prev() - { - if (pc_ >= base_) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - c0_ = *pc_--; - } else { - c0_ = KEY_EOF; - } - } - - void SetIsError() - { - is_error_ = true; - } - - void PrintF(const char *fmt, ...); - uint8_t *base_ {nullptr}; - uint8_t *pc_ {nullptr}; - uint8_t *end_ {nullptr}; - uint32_t flags_ {0}; - uint32_t c0_ {KEY_EOF}; - int capture_count_ {0}; - int stack_count_ {0}; - bool is_error_ {false}; - char error_msg_[TMP_BUF_SIZE] = {0}; // NOLINT(modernize-avoid-c-arrays) - int has_named_captures_ = -1; - int total_capture_count_ = -1; - DynChunk buffer_ {}; - DynChunk group_names_ {}; - PandaVector new_group_names_ {}; -}; -} // namespace panda::ecmascript -#endif // ECMASCRIPT_REGEXP_PARSER_H diff --git a/subproject_sources.gn b/subproject_sources.gn index 969bf0a367d3d872a4213a3304b5ccfbab3073bb..820ff03229bee041083120e507582c91c83e4c2b 100644 --- a/subproject_sources.gn +++ b/subproject_sources.gn @@ -190,7 +190,6 @@ srcs_runtime = [ "runtime/linked_hash_table.cpp", "runtime/literal_data_extractor.cpp", "runtime/message_string.cpp", - "runtime/mem/dyn_chunk.cpp", "runtime/mem/ecma_reference_processor.cpp", "runtime/mem/ecma_string.cpp", "runtime/mem/mem_manager.cpp", @@ -200,8 +199,6 @@ srcs_runtime = [ "runtime/object_operator.cpp", "runtime/layout_info.cpp", "runtime/regexp/regexp_executor.cpp", - "runtime/regexp/regexp_opcode.cpp", - "runtime/regexp/regexp_parser.cpp", "runtime/regexp/regexp_parser_cache.cpp", "runtime/tagged_dictionary.cpp", "runtime/template_string.cpp", diff --git a/tests/runtime/CMakeLists.txt b/tests/runtime/CMakeLists.txt index 5bce158715b2f66eb86b030b987ee476c9d3dc0c..165db910280594f93e234ec726c3ee22fc92ea22 100644 --- a/tests/runtime/CMakeLists.txt +++ b/tests/runtime/CMakeLists.txt @@ -163,10 +163,8 @@ set(ECMASCRIPT_BUILTINS_DATAVIEW_TESTS_SOURCES ) set(ECMASCRIPT_REGEXP_TESTS_SOURCES - regexp/dyn_buffer_test.cpp regexp/regexp_test.cpp - common/test_helper.cpp -) + common/test_helper.cpp) set(ECMASCRIPT_BUILTINS_TYPEDARRAY_TESTS_SOURCES builtins/builtins_typedarray_test.cpp diff --git a/tests/runtime/regexp/dyn_buffer_test.cpp b/tests/runtime/regexp/dyn_buffer_test.cpp deleted file mode 100644 index 7bc98541d3b2e3134b0d841635e26f46d19ae47b..0000000000000000000000000000000000000000 --- a/tests/runtime/regexp/dyn_buffer_test.cpp +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2021-2022 Huawei Device Co., Ltd. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" -#include "test_helper.h" - -#include "plugins/ecmascript/runtime/ecma_vm.h" -#include "include/runtime.h" -#include "include/runtime_options.h" -#include "plugins/ecmascript/runtime/mem/dyn_chunk.h" -#include "plugins/ecmascript/runtime/object_factory.h" - -namespace panda::test { - -// NOLINTNEXTLINE(google-build-using-namespace) -using namespace panda::ecmascript; - -class DynBufferTest : public testing::Test { -public: - void SetUp() override - { - TestHelper::CreateEcmaVMWithScope(instance_, thread_, scope_); - } - - void TearDown() override - { - TestHelper::DestroyEcmaVMWithScope(instance_, scope_); - } - -private: - PandaVM *instance_ {}; - JSThread *thread_ {}; - EcmaHandleScope *scope_ {nullptr}; -}; - -TEST_F(DynBufferTest, EmitAndGet) -{ - DynChunk dyn_chunk = DynChunk(); - // NOLINTNEXTLINE(readability-magic-numbers) - dyn_chunk.EmitChar(65); - // NOLINTNEXTLINE(readability-magic-numbers) - dyn_chunk.EmitU16(66); - // NOLINTNEXTLINE(readability-magic-numbers) - dyn_chunk.EmitU32(67); - ASSERT_EQ(dyn_chunk.GetSize(), 7); - ASSERT_EQ(dyn_chunk.GetAllocatedSize(), DynChunk::ALLOCATE_MIN_SIZE); - ASSERT_EQ(dyn_chunk.GetError(), false); - dyn_chunk.Insert(1, 1); - uint32_t val1 = dyn_chunk.GetU8(0); - uint32_t val2 = dyn_chunk.GetU16(2); - uint32_t val3 = dyn_chunk.GetU32(4); - ASSERT_EQ(val1, 65); - ASSERT_EQ(val2, 66); - ASSERT_EQ(val3, 67); -} - -TEST_F(DynBufferTest, EmitSelfAndGet) -{ - DynChunk dyn_chunk = DynChunk(); - // NOLINTNEXTLINE(readability-magic-numbers) - dyn_chunk.EmitChar(65); - dyn_chunk.EmitSelf(0, 1); - ASSERT_EQ(dyn_chunk.GetSize(), 2); - ASSERT_EQ(dyn_chunk.GetAllocatedSize(), DynChunk::ALLOCATE_MIN_SIZE); - ASSERT_EQ(dyn_chunk.GetError(), false); - uint32_t val1 = dyn_chunk.GetU8(0); - uint32_t val2 = dyn_chunk.GetU8(1); - ASSERT_EQ(val1, 65); - ASSERT_EQ(val2, 65); -} - -TEST_F(DynBufferTest, EmitStrAndGet) -{ - DynChunk dyn_chunk = DynChunk(); - dyn_chunk.EmitStr("abc"); - ASSERT_EQ(dyn_chunk.GetSize(), 4); - ASSERT_EQ(dyn_chunk.GetAllocatedSize(), DynChunk::ALLOCATE_MIN_SIZE); - ASSERT_EQ(dyn_chunk.GetError(), false); - uint32_t val1 = dyn_chunk.GetU8(0); - uint32_t val2 = dyn_chunk.GetU8(1); - uint32_t val3 = dyn_chunk.GetU8(2); - uint32_t val4 = dyn_chunk.GetU8(3); - ASSERT_EQ(val1, 97); - ASSERT_EQ(val2, 98); - ASSERT_EQ(val3, 99); - ASSERT_EQ(val4, 0); -} -} // namespace panda::test diff --git a/tests/runtime/regexp/regexp_test.cpp b/tests/runtime/regexp/regexp_test.cpp index 86b5b914aa8ea97511ccd18b8a7d2a4a96bdcffc..f2f621a40adf7037365180e87a1ed434ec00c8ed 100644 --- a/tests/runtime/regexp/regexp_test.cpp +++ b/tests/runtime/regexp/regexp_test.cpp @@ -19,7 +19,7 @@ #include "plugins/ecmascript/runtime/ecma_vm.h" #include "include/runtime.h" #include "include/runtime_options.h" -#include "plugins/ecmascript/runtime/regexp/regexp_parser.h" +#include "runtime/regexp/ecmascript/regexp_parser.h" #include "plugins/ecmascript/runtime/regexp/regexp_executor.h" #include "plugins/ecmascript/runtime/object_factory.h" #include "plugins/ecmascript/runtime/ecma_string-inl.h" @@ -28,7 +28,8 @@ namespace panda::test { // NOLINTNEXTLINE(google-build-using-namespace) using namespace panda::ecmascript; -using MatchResult = RegExpExecutor::MatchResult; +using MatchResult = RegExpMatchResult>; +using RegExpExecutor = panda::ecmascript::RegExpExecutor; // NOLINTBEGIN(readability-magic-numbers) @@ -54,56 +55,6 @@ public: TestHelper::DestroyEcmaVMWithScope(instance_, scope_); } - bool IsValidAlphaEscapeInAtom(char s) const - { - switch (s) { - // Assertion [U] :: \b - case 'b': - // Assertion [U] :: \B - case 'B': - // ControlEscape :: one of f n r t v - case 'f': - case 'n': - case 'r': - case 't': - case 'v': - // CharacterClassEscape :: one of d D s S w W - case 'd': - case 'D': - case 's': - case 'S': - case 'w': - case 'W': - return true; - default: - return false; - } - } - - bool IsValidAlphaEscapeInClass(char s) const - { - switch (s) { - // ClassEscape[U] :: b - case 'b': - // ControlEscape :: one of f n r t v - case 'f': - case 'n': - case 'r': - case 't': - case 'v': - // CharacterClassEscape :: one of d D s S w W - case 'd': - case 'D': - case 's': - case 'S': - case 'w': - case 'W': - return true; - default: - return false; - } - } - protected: // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes) JSThread *thread_ {nullptr}; @@ -113,526 +64,6 @@ private: EcmaHandleScope *scope_ {nullptr}; }; -TEST_F(RegExpTest, ParseError1) -{ - RegExpParser parser = RegExpParser(); - PandaString source("0{2,1}"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError2) -{ - RegExpParser parser = RegExpParser(); - PandaString source("^[z-a]$"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError3) -{ - RegExpParser parser = RegExpParser(); - PandaString source("\\"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError4) -{ - RegExpParser parser = RegExpParser(); - PandaString source("a**"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError5) -{ - RegExpParser parser = RegExpParser(); - PandaString source("a***"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError6) -{ - RegExpParser parser = RegExpParser(); - PandaString source("a**"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError7) -{ - RegExpParser parser = RegExpParser(); - PandaString source("a++"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError8) -{ - RegExpParser parser = RegExpParser(); - PandaString source("a+++"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError9) -{ - RegExpParser parser = RegExpParser(); - PandaString source("a???"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError10) -{ - RegExpParser parser = RegExpParser(); - PandaString source("a????"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError11) -{ - RegExpParser parser = RegExpParser(); - PandaString source("*a"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError12) -{ - RegExpParser parser = RegExpParser(); - PandaString source("**a"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError13) -{ - RegExpParser parser = RegExpParser(); - PandaString source("+a"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError14) -{ - RegExpParser parser = RegExpParser(); - PandaString source("++a"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError15) -{ - RegExpParser parser = RegExpParser(); - PandaString source("?a"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError16) -{ - RegExpParser parser = RegExpParser(); - PandaString source("??a"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError17) -{ - RegExpParser parser = RegExpParser(); - PandaString source("x{1}{1,}"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError18) -{ - RegExpParser parser = RegExpParser(); - PandaString source("x{1,2}{1}"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError19) -{ - RegExpParser parser = RegExpParser(); - PandaString source("x{1,}{1}"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError20) -{ - RegExpParser parser = RegExpParser(); - PandaString source("x{0,1}{1,}"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError21) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[b-ac-e]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError22) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[\\10b-G]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError23) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[\\0b-G]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 0); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError24) -{ - RegExpParser parser = RegExpParser(); - PandaString source("("); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError25) -{ - RegExpParser parser = RegExpParser(); - PandaString source(")"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError26) -{ - RegExpParser parser = RegExpParser(); - PandaString source("{"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError27) -{ - RegExpParser parser = RegExpParser(); - PandaString source("}"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError28) -{ - RegExpParser parser = RegExpParser(); - PandaString source("["); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError29) -{ - RegExpParser parser = RegExpParser(); - PandaString source("]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError30) -{ - RegExpParser parser = RegExpParser(); - PandaString source("\\c"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError31) -{ - RegExpParser parser = RegExpParser(); - PandaString source("\\c\024"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError32) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[\\c]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError33) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[\\c\024]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError34) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[\\d-a]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError35) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[\\s-a]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError36) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[\\s-\\w]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError37) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[a-\\w]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError38) -{ - RegExpParser parser = RegExpParser(); - PandaString source("\\{"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_FALSE(parse_result); -} - -TEST_F(RegExpTest, ParseError39) -{ - RegExpParser parser = RegExpParser(); - PandaString source("\\/"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_FALSE(parse_result); -} - -TEST_F(RegExpTest, ParseError40) -{ - for (char cu = 0x41; cu <= 0x5a; ++cu) { - if (!IsValidAlphaEscapeInAtom(cu)) { - PandaString source("\\"); - source += PandaString(&cu, 1); - RegExpParser parser = RegExpParser(); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); - } - } - for (char cu = 0x61; cu <= 0x7a; ++cu) { - if (!IsValidAlphaEscapeInAtom(cu)) { - PandaString source("\\"); - source += PandaString(&cu, 1); - RegExpParser parser = RegExpParser(); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); - } - } - for (char cu = 0x41; cu <= 0x5a; ++cu) { - PandaString source("[\\"); - if (!IsValidAlphaEscapeInAtom(cu)) { - source += PandaString(&cu, 1); - source += PandaString("]"); - RegExpParser parser = RegExpParser(); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); - } - } - for (char cu = 0x61; cu <= 0x7a; ++cu) { - PandaString source("[\\"); - if (!IsValidAlphaEscapeInAtom(cu)) { - source += PandaString(&cu, 1); - source += PandaString("]"); - RegExpParser parser = RegExpParser(); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); - } - } -} - -TEST_F(RegExpTest, ParseError44) -{ - RegExpParser parser = RegExpParser(); - PandaString source("\\1"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError45) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[\\1]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError46) -{ - RegExpParser parser = RegExpParser(); - PandaString source("\\00"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseError47) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[\\00]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_TRUE(parse_result); -} - -TEST_F(RegExpTest, ParseNoError1) -{ - RegExpParser parser = RegExpParser(); - PandaString source("a{10,2147483648}"); // 2^31 - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_FALSE(parse_result); -} - -TEST_F(RegExpTest, ParseNoError2) -{ - RegExpParser parser = RegExpParser(); - PandaString source("a{10,4294967306}"); // 2^32+10 - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 16); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_FALSE(parse_result); -} - -TEST_F(RegExpTest, ParseNoError3) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[\\⥚]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 1); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_FALSE(parse_result); -} - -TEST_F(RegExpTest, ParseNoError4) -{ - RegExpParser parser = RegExpParser(); - PandaString source("[\\⊲|\\⇐]"); - parser.Init(const_cast(reinterpret_cast(source.c_str())), source.size(), 1); - parser.Parse(); - bool parse_result = parser.IsError(); - ASSERT_FALSE(parse_result); -} - TEST_F(RegExpTest, ParseAndExec1) { ObjectFactory *factory = thread_->GetEcmaVM()->GetFactory(); @@ -1908,151 +1339,6 @@ TEST_F(RegExpTest, ParseAndExec59) ASSERT_TRUE(result.captures[0].second->Compare(*str) == 0); } -TEST_F(RegExpTest, RangeSet1) -{ - std::list> list_input = { - std::make_pair(1, 1), - std::make_pair(2, 2), - std::make_pair(3, 3), - }; - std::list> list_expected = { - std::make_pair(1, 5), - }; - RangeSet range_result(list_input); - RangeSet range_expected(list_expected); - range_result.Insert(4, 5); - range_result.Compress(); - EXPECT_EQ(range_result, range_expected); -} - -TEST_F(RegExpTest, RangeSet2) -{ - std::list> list_expected = { - std::make_pair(4, 5), - }; - RangeSet range_result; - RangeSet range_expected(list_expected); - range_result.Insert(4, 5); - range_result.Compress(); - EXPECT_EQ(range_result, range_expected); -} - -TEST_F(RegExpTest, RangeSet3) -{ - std::list> list_input = { - std::make_pair(2, 2), - }; - std::list> list_expected = { - std::make_pair(1, 5), - }; - RangeSet range_result(list_input); - RangeSet range_expected(list_expected); - range_result.Insert(1, 5); - range_result.Compress(); - EXPECT_EQ(range_result, range_expected); -} - -TEST_F(RegExpTest, RangeSet4) -{ - std::list> list_input = { - std::make_pair(1, 5), - }; - std::list> list_expected = { - std::make_pair(1, 5), - }; - RangeSet range_result(list_input); - RangeSet range_expected(list_expected); - range_result.Insert(2, 4); - range_result.Compress(); - EXPECT_EQ(range_result, range_expected); -} - -TEST_F(RegExpTest, RangeSet5) -{ - std::list> list_input = { - std::make_pair(1, 2), - std::make_pair(9, UINT16_MAX), - }; - std::list> list_expected = { - std::make_pair(1, 2), - std::make_pair(4, 7), - std::make_pair(9, UINT16_MAX), - }; - RangeSet range_result(list_input); - RangeSet range_expected(list_expected); - range_result.Insert(4, 7); - range_result.Compress(); - EXPECT_EQ(range_result, range_expected); -} - -TEST_F(RegExpTest, RangeSet6) -{ - std::list> list_expected = { - std::make_pair(0, UINT16_MAX), - }; - RangeSet range_result; - RangeSet range_expected(list_expected); - range_result.Invert(false); - EXPECT_EQ(range_result, range_expected); -} - -TEST_F(RegExpTest, RangeSet7) -{ - std::list> list_input = { - std::make_pair(1, 5), - }; - std::list> list_expected = { - std::make_pair(0, 0), - std::make_pair(6, UINT16_MAX), - }; - RangeSet range_result(list_input); - RangeSet range_expected(list_expected); - range_result.Invert(false); - EXPECT_EQ(range_result, range_expected); -} - -TEST_F(RegExpTest, RangeSet8) -{ - std::list> list_input = { - std::make_pair(1, 5), - std::make_pair(0xfffe, UINT16_MAX), - }; - std::list> list_expected = { - std::make_pair(0, 0), - std::make_pair(6, 0xfffd), - }; - RangeSet range_result(list_input); - RangeSet range_expected(list_expected); - range_result.Invert(false); - EXPECT_EQ(range_result, range_expected); -} - -TEST_F(RegExpTest, RangeSet9) -{ - std::list> list_input = { - std::make_pair(0, 5), - std::make_pair(0xfffe, 0xfffe), - }; - std::list> list_expected = { - std::make_pair(6, 0xfffd), - std::make_pair(UINT16_MAX, UINT16_MAX), - }; - RangeSet range_result(list_input); - RangeSet range_expected(list_expected); - range_result.Invert(false); - EXPECT_EQ(range_result, range_expected); -} - -TEST_F(RegExpTest, RangeSet10) -{ - std::list> list_input = { - std::make_pair(0, UINT16_MAX), - }; - RangeSet range_result(list_input); - RangeSet range_expected; - range_result.Invert(false); - EXPECT_EQ(range_result, range_expected); -} } // namespace panda::test // NOLINTEND(readability-magic-numbers)