From ead08bcd4933b3bf1b9d4b59cb98d54d731935db Mon Sep 17 00:00:00 2001 From: gavin1012_hw Date: Thu, 23 Feb 2023 10:23:11 +0800 Subject: [PATCH] Fix unicode implementation in es2panda Issue: I6H5LY Signed-off-by: gavin1012_hw Change-Id: I77716dcdbcedbc59b73b5e2a7043c57740b872f6 --- es2panda/lexer/keywordsUtil.cpp | 11 +- es2panda/lexer/lexer.cpp | 20 +- es2panda/lexer/regexp/regexp.cpp | 21 +- es2panda/lexer/token/letters.h | 2 +- ...incomplete-hex-unicode-escape-expected.txt | 191 ++++++++++++++++++ .../js/test-incomplete-hex-unicode-escape.js | 17 ++ .../js/test-invalid-regexp-flags-expected.txt | 1 + .../parser/js/test-invalid-regexp-flags.js | 16 ++ .../test-invalid-unicode-escape-expected.txt | 1 + .../parser/js/test-invalid-unicode-escape.js | 16 ++ ...tal-escape-in-template-string-expected.txt | Bin 0 -> 1899 bytes .../test-octal-escape-in-template-string.js | 16 ++ .../test/parser/js/test-regexp-p-expected.txt | 1 + ...processing-in-template-string-expected.txt | 3 + es2panda/test/test_tsc_ignore_list.txt | 12 -- test262/es2021_tests.txt | 101 ++++++++- test262/es2abc_skip_tests.json | 25 +++ test262/ts2abc_skip_tests.json | 19 ++ 18 files changed, 436 insertions(+), 37 deletions(-) create mode 100644 es2panda/test/parser/js/test-incomplete-hex-unicode-escape-expected.txt create mode 100644 es2panda/test/parser/js/test-incomplete-hex-unicode-escape.js create mode 100644 es2panda/test/parser/js/test-invalid-regexp-flags-expected.txt create mode 100644 es2panda/test/parser/js/test-invalid-regexp-flags.js create mode 100644 es2panda/test/parser/js/test-invalid-unicode-escape-expected.txt create mode 100644 es2panda/test/parser/js/test-invalid-unicode-escape.js create mode 100644 es2panda/test/parser/js/test-octal-escape-in-template-string-expected.txt create mode 100644 es2panda/test/parser/js/test-octal-escape-in-template-string.js diff --git a/es2panda/lexer/keywordsUtil.cpp b/es2panda/lexer/keywordsUtil.cpp index f939f10c57..eb8230ee4d 100644 --- a/es2panda/lexer/keywordsUtil.cpp +++ b/es2panda/lexer/keywordsUtil.cpp @@ -177,8 +177,8 @@ bool KeywordsUtil::IsIdentifierStart(char32_t cp) if (cp < LEX_ASCII_MAX_BITS) { return (ASCII_FLAGS[cp] & AsciiFlags::ID_START) != 0; } - - auto uchar = static_cast(cp); + // Unicode {xxxxx} may consist of 4 bytes information and cannot be forcibly converted to 2 bytes + auto uchar = static_cast(cp); return u_hasBinaryProperty(uchar, UCHAR_ID_START); } @@ -188,8 +188,11 @@ bool KeywordsUtil::IsIdentifierPart(char32_t cp) return (ASCII_FLAGS[cp] & AsciiFlags::ID_CONTINUE) != 0; } - // u_isIDPart or Other_ID_Continue characters or ZWJ/ZWNJ. - auto uchar = static_cast(cp); + /** + * u_isIDPart or Other_ID_Continue characters or ZWJ/ZWNJ. + * Unicode {xxxxx} may consist of 4 bytes information and cannot be forcibly converted to 2 bytes + */ + auto uchar = static_cast(cp); return (u_hasBinaryProperty(uchar, UCHAR_ID_CONTINUE) || cp == LEX_CHAR_ZWNJ || cp == LEX_CHAR_ZWJ); } diff --git a/es2panda/lexer/lexer.cpp b/es2panda/lexer/lexer.cpp index 9a587def59..dd34c9c4fc 100644 --- a/es2panda/lexer/lexer.cpp +++ b/es2panda/lexer/lexer.cpp @@ -479,16 +479,6 @@ LexerTemplateString Lexer::ScanTemplateString() Iterator().Forward(1); char32_t nextCp = Iterator().Peek(); - if (IsOctalDigit(nextCp)) { - Iterator().Forward(1); - - if (Iterator().Peek() != LEX_CHAR_BACK_TICK) { - ThrowError("Octal escape sequences are not allowed in template strings"); - } - - Iterator().Backward(1); - } - if (nextCp == LEX_CHAR_BACK_TICK || nextCp == LEX_CHAR_BACKSLASH || nextCp == LEX_CHAR_DOLLAR_SIGN) { templateStr.str.Append(cp); templateStr.str.Append(nextCp); @@ -596,6 +586,7 @@ void Lexer::ScanStringUnicodePart(util::UString *str) case LEX_CHAR_0: { Iterator().Forward(1); bool isDecimal = IsDecimalDigit(Iterator().Peek()); + bool isOctal = IsOctalDigit(Iterator().Peek()); Iterator().Backward(1); if (!isDecimal) { @@ -603,6 +594,10 @@ void Lexer::ScanStringUnicodePart(util::UString *str) break; } + if (isOctal) { + ThrowError("Octal escape sequences are not allowed in strict mode"); + } + [[fallthrough]]; } default: { @@ -1101,9 +1096,12 @@ RegExpFlags Lexer::ScanRegExpFlags() flag = RegExpFlags::STICKY; break; } - default: { + case LEX_CHAR_SP: { return resultFlags; } + default: { + ThrowError("Invalid RegExp flag"); + } } if (flag == RegExpFlags::EMPTY || (resultFlags & flag) != 0) { diff --git a/es2panda/lexer/regexp/regexp.cpp b/es2panda/lexer/regexp/regexp.cpp index ea4356ce4d..f2885fc5aa 100644 --- a/es2panda/lexer/regexp/regexp.cpp +++ b/es2panda/lexer/regexp/regexp.cpp @@ -456,16 +456,20 @@ void RegExpParser::ParseAtomEscape() switch (cp) { case LEX_CHAR_LOWERCASE_X: { - ParseHexEscape(); + if (Unicode()) { + ParseHexEscape(); + } break; } case LEX_CHAR_LOWERCASE_U: { - ParseUnicodeEscape(); + if (Unicode()) { + ParseUnicodeEscape(); + } break; } case LEX_CHAR_LOWERCASE_K: { ParseNamedBackreference(); - return; + break; } /* ControlEscape */ case LEX_CHAR_LOWERCASE_F: @@ -480,12 +484,12 @@ void RegExpParser::ParseAtomEscape() case LEX_CHAR_UPPERCASE_S: case LEX_CHAR_LOWERCASE_W: case LEX_CHAR_UPPERCASE_W: { - return; + break; } case LEX_CHAR_LOWERCASE_P: case LEX_CHAR_UPPERCASE_P: { ParseUnicodePropertyEscape(); - return; + break; } case LEX_CHAR_LOWERCASE_C: { cp = Peek(); @@ -495,7 +499,7 @@ void RegExpParser::ParseAtomEscape() } Next(); - return; + break; } default: { /* IdentityEscape */ @@ -583,6 +587,7 @@ uint32_t RegExpParser::ParseLegacyOctalEscape() uint32_t RegExpParser::ParseHexEscape() { + // two hexadecimal digits after x in the regular expression char32_t digit = Next(); if (!IsHexDigit(digit)) { ThrowError("Invalid hex escape"); @@ -814,13 +819,13 @@ bool RegExpParser::ParsePatternCharacter() static bool IsIdStart(uint32_t cp) { - auto uchar = static_cast(cp); + auto uchar = static_cast(cp); return u_isIDStart(uchar) != 0 || cp == LEX_CHAR_DOLLAR_SIGN || cp == LEX_CHAR_UNDERSCORE; } static bool IsIdCont(uint32_t cp) { - auto uchar = static_cast(cp); + auto uchar = static_cast(cp); return u_isIDPart(uchar) != 0 || cp == LEX_CHAR_DOLLAR_SIGN || cp == LEX_CHAR_ZWNJ || cp == LEX_CHAR_ZWJ; } diff --git a/es2panda/lexer/token/letters.h b/es2panda/lexer/token/letters.h index cb8c574b5c..fe2f1168cc 100644 --- a/es2panda/lexer/token/letters.h +++ b/es2panda/lexer/token/letters.h @@ -80,7 +80,7 @@ namespace panda::es2panda::lexer { #define LEX_CHAR_UPPERCASE_W 0X57 /* W */ #define LEX_CHAR_UPPERCASE_X 0x58 /* X */ #define LEX_CHAR_UPPERCASE_Y 0x59 /* Y */ -#define LEX_CHAR_UPPERCASE_Z 0x5A /* X */ +#define LEX_CHAR_UPPERCASE_Z 0x5A /* Z */ #define LEX_CHAR_BS 0x08 /* backspace */ #define LEX_CHAR_TAB 0x09 /* character tabulation */ diff --git a/es2panda/test/parser/js/test-incomplete-hex-unicode-escape-expected.txt b/es2panda/test/parser/js/test-incomplete-hex-unicode-escape-expected.txt new file mode 100644 index 0000000000..3ff447428e --- /dev/null +++ b/es2panda/test/parser/js/test-incomplete-hex-unicode-escape-expected.txt @@ -0,0 +1,191 @@ +{ + "type": "Program", + "statements": [ + { + "type": "ExpressionStatement", + "expression": { + "type": "CallExpression", + "callee": { + "type": "MemberExpression", + "object": { + "type": "RegExpLiteral", + "source": "\xa", + "flags": "", + "loc": { + "start": { + "line": 16, + "column": 2 + }, + "end": { + "line": 16, + "column": 6 + } + } + }, + "property": { + "type": "Identifier", + "name": "test", + "decorators": [], + "loc": { + "start": { + "line": 16, + "column": 7 + }, + "end": { + "line": 16, + "column": 11 + } + } + }, + "computed": false, + "optional": false, + "loc": { + "start": { + "line": 16, + "column": 2 + }, + "end": { + "line": 16, + "column": 11 + } + } + }, + "arguments": [ + { + "type": "StringLiteral", + "value": "xa", + "loc": { + "start": { + "line": 16, + "column": 12 + }, + "end": { + "line": 16, + "column": 16 + } + } + } + ], + "optional": false, + "loc": { + "start": { + "line": 16, + "column": 2 + }, + "end": { + "line": 16, + "column": 17 + } + } + }, + "loc": { + "start": { + "line": 16, + "column": 1 + }, + "end": { + "line": 16, + "column": 18 + } + } + }, + { + "type": "ExpressionStatement", + "expression": { + "type": "CallExpression", + "callee": { + "type": "MemberExpression", + "object": { + "type": "RegExpLiteral", + "source": "\x", + "flags": "", + "loc": { + "start": { + "line": 17, + "column": 2 + }, + "end": { + "line": 17, + "column": 5 + } + } + }, + "property": { + "type": "Identifier", + "name": "test", + "decorators": [], + "loc": { + "start": { + "line": 17, + "column": 6 + }, + "end": { + "line": 17, + "column": 10 + } + } + }, + "computed": false, + "optional": false, + "loc": { + "start": { + "line": 17, + "column": 2 + }, + "end": { + "line": 17, + "column": 10 + } + } + }, + "arguments": [ + { + "type": "StringLiteral", + "value": "x", + "loc": { + "start": { + "line": 17, + "column": 11 + }, + "end": { + "line": 17, + "column": 14 + } + } + } + ], + "optional": false, + "loc": { + "start": { + "line": 17, + "column": 2 + }, + "end": { + "line": 17, + "column": 15 + } + } + }, + "loc": { + "start": { + "line": 17, + "column": 1 + }, + "end": { + "line": 17, + "column": 16 + } + } + } + ], + "loc": { + "start": { + "line": 1, + "column": 1 + }, + "end": { + "line": 18, + "column": 1 + } + } +} diff --git a/es2panda/test/parser/js/test-incomplete-hex-unicode-escape.js b/es2panda/test/parser/js/test-incomplete-hex-unicode-escape.js new file mode 100644 index 0000000000..b1d3a38905 --- /dev/null +++ b/es2panda/test/parser/js/test-incomplete-hex-unicode-escape.js @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/\xa/.test("xa"); +/\x/.test("x"); diff --git a/es2panda/test/parser/js/test-invalid-regexp-flags-expected.txt b/es2panda/test/parser/js/test-invalid-regexp-flags-expected.txt new file mode 100644 index 0000000000..3d874a22bb --- /dev/null +++ b/es2panda/test/parser/js/test-invalid-regexp-flags-expected.txt @@ -0,0 +1 @@ +SyntaxError: Invalid RegExp flag [test-invalid-regexp-flags.js:15:612] diff --git a/es2panda/test/parser/js/test-invalid-regexp-flags.js b/es2panda/test/parser/js/test-invalid-regexp-flags.js new file mode 100644 index 0000000000..b2a5342280 --- /dev/null +++ b/es2panda/test/parser/js/test-invalid-regexp-flags.js @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/adad/p; diff --git a/es2panda/test/parser/js/test-invalid-unicode-escape-expected.txt b/es2panda/test/parser/js/test-invalid-unicode-escape-expected.txt new file mode 100644 index 0000000000..1796c96866 --- /dev/null +++ b/es2panda/test/parser/js/test-invalid-unicode-escape-expected.txt @@ -0,0 +1 @@ +SyntaxError: Invalid hex escape [test-invalid-unicode-escape.js:16:10] diff --git a/es2panda/test/parser/js/test-invalid-unicode-escape.js b/es2panda/test/parser/js/test-invalid-unicode-escape.js new file mode 100644 index 0000000000..be33039c3b --- /dev/null +++ b/es2panda/test/parser/js/test-invalid-unicode-escape.js @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +var c = /\x1/u; diff --git a/es2panda/test/parser/js/test-octal-escape-in-template-string-expected.txt b/es2panda/test/parser/js/test-octal-escape-in-template-string-expected.txt new file mode 100644 index 0000000000000000000000000000000000000000..f916e97225c3da2f9f1533bab904725132684300 GIT binary patch literal 1899 zcmcgsy-ve05N7UEM90d|22lhv3`h(Nh=nqk)*!1+T%5ENReg8P7n1mF2a1r$V97q; z_j7mgZj@zEf2<^2W^iZCs}%*DxWlRzwJc;=R~UN4lPufO>Lok~tHiUGS8}d}6}2)Y zCA-V8=ns1cABJwV+#!^)9TMGB3zX3ca3`LOn+3{LFG}(pyitmRoNXCcBhH{;V@cYR zbKHNR%{*emBrJ)o69EiqRXU={lp{Gzm>az=N{pT{mMq(cmqC`XO~N<`e;VSm{odO# zW>BSyZFN&|FN>ANS-4fTv_glKl-re+RfVljmhx>aD#iQX6q@&vOt!PoGG1#Mz<`mi zZiHUDg2yOWa4X(11hZ|P&&If(CJ?AEW+`300gjHrt4<@3)u=fd7z|Q0JRB7Mrhydi zhGa61lq4pFa)zemcZ7M8FcRU9k0apwfBJ(i0$EXy{`d#wlaPSzeb0uo3+m8<|cTPK)Y|_7(R`CJg?Bljo8A#)ezT d2-6#B?eN=2Ih6P%FOb%yoe$