From 7312c012bed0d38f681eb89e24db57575e102a19 Mon Sep 17 00:00:00 2001 From: ElevenDuan Date: Wed, 24 Jan 2024 09:24:37 +0800 Subject: [PATCH] parser unsafe file failed Signed-off-by: ElevenDuan Change-Id: I16d208ba85a29ec7dabce63dcbdcdbcbaaa6d1b6 --- .../parser/js/test_ansi_string-expected.txt | 1 + es2panda/test/parser/js/test_ansi_string.js | 16 +++++++++++ es2panda/util/ustring.h | 28 +++++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 es2panda/test/parser/js/test_ansi_string-expected.txt create mode 100644 es2panda/test/parser/js/test_ansi_string.js diff --git a/es2panda/test/parser/js/test_ansi_string-expected.txt b/es2panda/test/parser/js/test_ansi_string-expected.txt new file mode 100644 index 0000000000..e5feefc5fe --- /dev/null +++ b/es2panda/test/parser/js/test_ansi_string-expected.txt @@ -0,0 +1 @@ +SyntaxError: Unterminated RegExp [test_unsafe_file.js:16:7] diff --git a/es2panda/test/parser/js/test_ansi_string.js b/es2panda/test/parser/js/test_ansi_string.js new file mode 100644 index 0000000000..6ee51d2328 --- /dev/null +++ b/es2panda/test/parser/js/test_ansi_string.js @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2024 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/ ��?\������? ??\+ ??\/ ??\/ ??\ diff --git a/es2panda/util/ustring.h b/es2panda/util/ustring.h index 52976ee8ac..94c36ac06a 100644 --- a/es2panda/util/ustring.h +++ b/es2panda/util/ustring.h @@ -200,6 +200,18 @@ public: return iter_ != sv_.end(); } + bool HasExpectedNumberOfBytes(size_t count) const + { + for (size_t i = 0; i < count; ++i) { + if (!HasNext()) { + return false; + } + iter_++; + } + iter_ -= count; + return true; + } + void SkipCp() const; private: @@ -235,6 +247,10 @@ private: static constexpr uint16_t UTF8_CONT_MASK = 0x3F; static constexpr uint16_t UTF8_CONT_HEADER = 0x80; + static constexpr size_t UTF8_NEXT_ONE_BYTE = 1; + static constexpr size_t UTF8_NEXT_TWO_BYTE = 2; + static constexpr size_t UTF8_NEXT_THREE_BYTE = 3; + static constexpr char32_t SURROGATE_HIGH_MIN = 0xD800; static constexpr char32_t SURROGATE_HIGH_MAX = 0xDC00; static constexpr char32_t SURROGATE_LOW_MIN = 0xDC00; @@ -322,15 +338,27 @@ char32_t StringView::Iterator::DecodeCP([[maybe_unused]] size_t *cpSize) const if (cu0 < Constants::UTF8_1BYTE_LIMIT) { res = cu0; } else if ((cu0 & Constants::UTF8_3BYTE_HEADER) == Constants::UTF8_2BYTE_HEADER) { + // Should be 2 bytes decoded in UTF-8, check if there is one byte following. + if (!HasExpectedNumberOfBytes(Constants::UTF8_NEXT_ONE_BYTE)) { + return INVALID_CP; + } char32_t cu1 = static_cast(*iterNext++); res = ((cu0 & Constants::UTF8_2BYTE_MASK) << Constants::UTF8_2BYTE_SHIFT) | (cu1 & Constants::UTF8_CONT_MASK); } else if ((cu0 & Constants::UTF8_4BYTE_HEADER) == Constants::UTF8_3BYTE_HEADER) { + // Should be 3 bytes decoded in UTF-8, check if there are 2 bytes following. + if (!HasExpectedNumberOfBytes(Constants::UTF8_NEXT_TWO_BYTE)) { + return INVALID_CP; + } char32_t cu1 = static_cast(*iterNext++); char32_t cu2 = static_cast(*iterNext++); res = ((cu0 & Constants::UTF8_3BYTE_MASK) << Constants::UTF8_3BYTE_SHIFT) | ((cu1 & Constants::UTF8_CONT_MASK) << Constants::UTF8_2BYTE_SHIFT) | (cu2 & Constants::UTF8_CONT_MASK); } else if (((cu0 & Constants::UTF8_DECODE_4BYTE_MASK) == Constants::UTF8_4BYTE_HEADER) && (cu0 <= Constants::UTF8_DECODE_4BYTE_LIMIT)) { + // Should be 4 bytes decoded in UTF-8, check if there are 3 bytes following. + if (!HasExpectedNumberOfBytes(Constants::UTF8_NEXT_THREE_BYTE)) { + return INVALID_CP; + } char32_t cu1 = static_cast(*iterNext++); char32_t cu2 = static_cast(*iterNext++); char32_t cu3 = static_cast(*iterNext++); -- Gitee