From b2151fbc896945c068715ba8ccc62db29b640363 Mon Sep 17 00:00:00 2001 From: fcc Date: Tue, 10 Jun 2025 15:43:56 +0800 Subject: [PATCH] fix unicode whitespace in lexer Fix Unicode whitespace in lexer. Issue: https://gitee.com/openharmony/arkcompiler_ets_frontend/issues/ICE0R4 Signed-off-by: fcc --- ets2panda/lexer/lexer.cpp | 15 ++++++++++-- ets2panda/lexer/token/letters.h | 24 ++++++++++++------- .../test/runtime/ets/unicode_whitespace.ets | 17 +++++++++++++ 3 files changed, 46 insertions(+), 10 deletions(-) create mode 100644 ets2panda/test/runtime/ets/unicode_whitespace.ets diff --git a/ets2panda/lexer/lexer.cpp b/ets2panda/lexer/lexer.cpp index 3868b42336..5cbf89f97c 100644 --- a/ets2panda/lexer/lexer.cpp +++ b/ets2panda/lexer/lexer.cpp @@ -1340,17 +1340,27 @@ bool Lexer::SkipWhiteSpacesHelperDefault(const char32_t &cp) size_t cpSize {}; - switch (Iterator().PeekCp(&cpSize)) { + char32_t ch = Iterator().PeekCp(&cpSize); + switch (ch) { case LEX_CHAR_LS: case LEX_CHAR_PS: pos_.nextTokenLine_++; [[fallthrough]]; case LEX_CHAR_NBSP: case LEX_CHAR_ZWNBSP: + case LEX_CHAR_OGHAM: + case LEX_CHAR_NARROW_NO_BREAK_SP: + case LEX_CHAR_MATHEMATICAL_SP: + case LEX_CHAR_IDEOGRAPHIC_SP: Iterator().Forward(cpSize); return true; default: - return false; + if (ch >= LEX_CHAR_ENQUAD && ch <= LEX_CHAR_ZERO_WIDTH_SP) { + Iterator().Forward(cpSize); + return true; + } else { + return false; + } } } @@ -1376,6 +1386,7 @@ void Lexer::SkipWhiteSpaces() case LEX_CHAR_FF: case LEX_CHAR_SP: case LEX_CHAR_TAB: + case LEX_CHAR_NEXT_LINE: Iterator().Forward(1); continue; case LEX_CHAR_SLASH: diff --git a/ets2panda/lexer/token/letters.h b/ets2panda/lexer/token/letters.h index 222aa6ca68..e33d21b114 100644 --- a/ets2panda/lexer/token/letters.h +++ b/ets2panda/lexer/token/letters.h @@ -82,14 +82,22 @@ inline constexpr char32_t LEX_CHAR_UPPERCASE_X = 0x58; /* X */ inline constexpr char32_t LEX_CHAR_UPPERCASE_Y = 0x59; /* Y */ inline constexpr char32_t LEX_CHAR_UPPERCASE_Z = 0x5A; /* Y */ -inline constexpr char32_t LEX_CHAR_BS = 0x08; /* backspace */ -inline constexpr char32_t LEX_CHAR_TAB = 0x09; /* character tabulation */ -inline constexpr char32_t LEX_CHAR_VT = 0x0B; /* liner tabulation */ -inline constexpr char32_t LEX_CHAR_FF = 0x0C; /* form feed */ -inline constexpr char32_t LEX_CHAR_SP = 0x20; /* space */ -inline constexpr char32_t LEX_CHAR_NBSP = 0xA0; /* no-break space */ -inline constexpr char32_t LEX_CHAR_ZWNBSP = 0xFEFF; /* zero width no-break space */ -inline constexpr char32_t LEX_CHAR_MVS = 0x180e; /* MONGOLIAN VOWEL SEPARATOR (U+180E) */ +inline constexpr char32_t LEX_CHAR_BS = 0x08; /* backspace */ +inline constexpr char32_t LEX_CHAR_TAB = 0x09; /* character tabulation */ +inline constexpr char32_t LEX_CHAR_VT = 0x0B; /* liner tabulation */ +inline constexpr char32_t LEX_CHAR_FF = 0x0C; /* form feed */ +inline constexpr char32_t LEX_CHAR_SP = 0x20; /* space */ +inline constexpr char32_t LEX_CHAR_NBSP = 0xA0; /* no-break space */ +inline constexpr char32_t LEX_CHAR_ZWNBSP = 0xFEFF; /* zero width no-break space */ +inline constexpr char32_t LEX_CHAR_MVS = 0x180e; /* MONGOLIAN VOWEL SEPARATOR (U+180E) */ +inline constexpr char32_t LEX_CHAR_NEXT_LINE = 0x85; /* next line */ +inline constexpr char32_t LEX_CHAR_OGHAM = 0x1680; /* ogham */ +inline constexpr char32_t LEX_CHAR_ENQUAD = 0X2000; +inline constexpr char32_t LEX_CHAR_ZERO_WIDTH_SP = 0x200B; +inline constexpr char32_t LEX_CHAR_NARROW_NO_BREAK_SP = 0x202F; +inline constexpr char32_t LEX_CHAR_MATHEMATICAL_SP = 0x205F; +inline constexpr char32_t LEX_CHAR_IDEOGRAPHIC_SP = 0x3000; + inline constexpr char32_t LEX_CHAR_DOUBLE_QUOTE = 0x22; /* " */ inline constexpr char32_t LEX_CHAR_DOLLAR_SIGN = 0x24; /* $ */ inline constexpr char32_t LEX_CHAR_SINGLE_QUOTE = 0x27; /* ' */ diff --git a/ets2panda/test/runtime/ets/unicode_whitespace.ets b/ets2panda/test/runtime/ets/unicode_whitespace.ets new file mode 100644 index 0000000000..dc0042c7b9 --- /dev/null +++ b/ets2panda/test/runtime/ets/unicode_whitespace.ets @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// this function intentionally contains unicode whitespace +function main() { assertTrue(true) } -- Gitee