diff --git a/ets2panda/lexer/lexer.cpp b/ets2panda/lexer/lexer.cpp index 3868b42336859c781eef8ca10aeeecfa7c0995c5..5cbf89f97cd258fbd86ceed908b4c0cfbfbaac99 100644 --- a/ets2panda/lexer/lexer.cpp +++ b/ets2panda/lexer/lexer.cpp @@ -1340,17 +1340,27 @@ bool Lexer::SkipWhiteSpacesHelperDefault(const char32_t &cp) size_t cpSize {}; - switch (Iterator().PeekCp(&cpSize)) { + char32_t ch = Iterator().PeekCp(&cpSize); + switch (ch) { case LEX_CHAR_LS: case LEX_CHAR_PS: pos_.nextTokenLine_++; [[fallthrough]]; case LEX_CHAR_NBSP: case LEX_CHAR_ZWNBSP: + case LEX_CHAR_OGHAM: + case LEX_CHAR_NARROW_NO_BREAK_SP: + case LEX_CHAR_MATHEMATICAL_SP: + case LEX_CHAR_IDEOGRAPHIC_SP: Iterator().Forward(cpSize); return true; default: - return false; + if (ch >= LEX_CHAR_ENQUAD && ch <= LEX_CHAR_ZERO_WIDTH_SP) { + Iterator().Forward(cpSize); + return true; + } else { + return false; + } } } @@ -1376,6 +1386,7 @@ void Lexer::SkipWhiteSpaces() case LEX_CHAR_FF: case LEX_CHAR_SP: case LEX_CHAR_TAB: + case LEX_CHAR_NEXT_LINE: Iterator().Forward(1); continue; case LEX_CHAR_SLASH: diff --git a/ets2panda/lexer/token/letters.h b/ets2panda/lexer/token/letters.h index 222aa6ca685ce90f41a2ae483bb32ad50464857e..e33d21b1142c67a38b63d43a501964425c39f4b7 100644 --- a/ets2panda/lexer/token/letters.h +++ b/ets2panda/lexer/token/letters.h @@ -82,14 +82,22 @@ inline constexpr char32_t LEX_CHAR_UPPERCASE_X = 0x58; /* X */ inline constexpr char32_t LEX_CHAR_UPPERCASE_Y = 0x59; /* Y */ inline constexpr char32_t LEX_CHAR_UPPERCASE_Z = 0x5A; /* Y */ -inline constexpr char32_t LEX_CHAR_BS = 0x08; /* backspace */ -inline constexpr char32_t LEX_CHAR_TAB = 0x09; /* character tabulation */ -inline constexpr char32_t LEX_CHAR_VT = 0x0B; /* liner tabulation */ -inline constexpr char32_t LEX_CHAR_FF = 0x0C; /* form feed */ -inline constexpr char32_t LEX_CHAR_SP = 0x20; /* space */ -inline constexpr char32_t LEX_CHAR_NBSP = 0xA0; /* no-break space */ -inline constexpr char32_t LEX_CHAR_ZWNBSP = 0xFEFF; /* zero width no-break space */ -inline constexpr char32_t LEX_CHAR_MVS = 0x180e; /* MONGOLIAN VOWEL SEPARATOR (U+180E) */ +inline constexpr char32_t LEX_CHAR_BS = 0x08; /* backspace */ +inline constexpr char32_t LEX_CHAR_TAB = 0x09; /* character tabulation */ +inline constexpr char32_t LEX_CHAR_VT = 0x0B; /* liner tabulation */ +inline constexpr char32_t LEX_CHAR_FF = 0x0C; /* form feed */ +inline constexpr char32_t LEX_CHAR_SP = 0x20; /* space */ +inline constexpr char32_t LEX_CHAR_NBSP = 0xA0; /* no-break space */ +inline constexpr char32_t LEX_CHAR_ZWNBSP = 0xFEFF; /* zero width no-break space */ +inline constexpr char32_t LEX_CHAR_MVS = 0x180e; /* MONGOLIAN VOWEL SEPARATOR (U+180E) */ +inline constexpr char32_t LEX_CHAR_NEXT_LINE = 0x85; /* next line */ +inline constexpr char32_t LEX_CHAR_OGHAM = 0x1680; /* ogham */ +inline constexpr char32_t LEX_CHAR_ENQUAD = 0X2000; +inline constexpr char32_t LEX_CHAR_ZERO_WIDTH_SP = 0x200B; +inline constexpr char32_t LEX_CHAR_NARROW_NO_BREAK_SP = 0x202F; +inline constexpr char32_t LEX_CHAR_MATHEMATICAL_SP = 0x205F; +inline constexpr char32_t LEX_CHAR_IDEOGRAPHIC_SP = 0x3000; + inline constexpr char32_t LEX_CHAR_DOUBLE_QUOTE = 0x22; /* " */ inline constexpr char32_t LEX_CHAR_DOLLAR_SIGN = 0x24; /* $ */ inline constexpr char32_t LEX_CHAR_SINGLE_QUOTE = 0x27; /* ' */ diff --git a/ets2panda/test/runtime/ets/unicode_whitespace.ets b/ets2panda/test/runtime/ets/unicode_whitespace.ets new file mode 100644 index 0000000000000000000000000000000000000000..dc0042c7b9648143f62849d8511fff2981ae03ab --- /dev/null +++ b/ets2panda/test/runtime/ets/unicode_whitespace.ets @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// this function intentionally contains unicode whitespace +function main() { assertTrue(true) }