diff --git a/es2panda/test/parser/js/test_ansi_string-expected.txt b/es2panda/test/parser/js/test_ansi_string-expected.txt new file mode 100644 index 0000000000000000000000000000000000000000..e5feefc5fe84096908752547fdfdbcb0a0fc0eb1 --- /dev/null +++ b/es2panda/test/parser/js/test_ansi_string-expected.txt @@ -0,0 +1 @@ +SyntaxError: Unterminated RegExp [test_unsafe_file.js:16:7] diff --git a/es2panda/test/parser/js/test_ansi_string.js b/es2panda/test/parser/js/test_ansi_string.js new file mode 100644 index 0000000000000000000000000000000000000000..6ee51d23286af3bd30bbb6fd82aa5633d9779d83 --- /dev/null +++ b/es2panda/test/parser/js/test_ansi_string.js @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2024 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/ ��?\������? ??\+ ??\/ ??\/ ??\ diff --git a/es2panda/util/ustring.h b/es2panda/util/ustring.h index 52976ee8acabd2c235cb2bafb73229d6c880a2ed..94c36ac06a5f0e8e54e75cb4967d0e8b6eac054e 100644 --- a/es2panda/util/ustring.h +++ b/es2panda/util/ustring.h @@ -200,6 +200,18 @@ public: return iter_ != sv_.end(); } + bool HasExpectedNumberOfBytes(size_t count) const + { + for (size_t i = 0; i < count; ++i) { + if (!HasNext()) { + return false; + } + iter_++; + } + iter_ -= count; + return true; + } + void SkipCp() const; private: @@ -235,6 +247,10 @@ private: static constexpr uint16_t UTF8_CONT_MASK = 0x3F; static constexpr uint16_t UTF8_CONT_HEADER = 0x80; + static constexpr size_t UTF8_NEXT_ONE_BYTE = 1; + static constexpr size_t UTF8_NEXT_TWO_BYTE = 2; + static constexpr size_t UTF8_NEXT_THREE_BYTE = 3; + static constexpr char32_t SURROGATE_HIGH_MIN = 0xD800; static constexpr char32_t SURROGATE_HIGH_MAX = 0xDC00; static constexpr char32_t SURROGATE_LOW_MIN = 0xDC00; @@ -322,15 +338,27 @@ char32_t StringView::Iterator::DecodeCP([[maybe_unused]] size_t *cpSize) const if (cu0 < Constants::UTF8_1BYTE_LIMIT) { res = cu0; } else if ((cu0 & Constants::UTF8_3BYTE_HEADER) == Constants::UTF8_2BYTE_HEADER) { + // Should be 2 bytes decoded in UTF-8, check if there is one byte following. + if (!HasExpectedNumberOfBytes(Constants::UTF8_NEXT_ONE_BYTE)) { + return INVALID_CP; + } char32_t cu1 = static_cast(*iterNext++); res = ((cu0 & Constants::UTF8_2BYTE_MASK) << Constants::UTF8_2BYTE_SHIFT) | (cu1 & Constants::UTF8_CONT_MASK); } else if ((cu0 & Constants::UTF8_4BYTE_HEADER) == Constants::UTF8_3BYTE_HEADER) { + // Should be 3 bytes decoded in UTF-8, check if there are 2 bytes following. + if (!HasExpectedNumberOfBytes(Constants::UTF8_NEXT_TWO_BYTE)) { + return INVALID_CP; + } char32_t cu1 = static_cast(*iterNext++); char32_t cu2 = static_cast(*iterNext++); res = ((cu0 & Constants::UTF8_3BYTE_MASK) << Constants::UTF8_3BYTE_SHIFT) | ((cu1 & Constants::UTF8_CONT_MASK) << Constants::UTF8_2BYTE_SHIFT) | (cu2 & Constants::UTF8_CONT_MASK); } else if (((cu0 & Constants::UTF8_DECODE_4BYTE_MASK) == Constants::UTF8_4BYTE_HEADER) && (cu0 <= Constants::UTF8_DECODE_4BYTE_LIMIT)) { + // Should be 4 bytes decoded in UTF-8, check if there are 3 bytes following. + if (!HasExpectedNumberOfBytes(Constants::UTF8_NEXT_THREE_BYTE)) { + return INVALID_CP; + } char32_t cu1 = static_cast(*iterNext++); char32_t cu2 = static_cast(*iterNext++); char32_t cu3 = static_cast(*iterNext++);