From 3a0bdbfba8fb77d76feffee42ea8ea1e92655f7e Mon Sep 17 00:00:00 2001
From: bojiang
Date: Tue, 23 Aug 2022 19:17:46 +0800
Subject: [PATCH 1/6] jiangbo91@huawei.com
Signed-off-by: bojiang
---
.eslintrc.js | 20 +++
.prettierignore | 2 +
.prettierrc | 3 +
.travis.yml | 4 +
README.OpenSource | 11 --
README.en.md | 25 ----
README.md | 52 +++++---
bundle.json | 30 -----
docs/version-history.md | 10 ++
lerna.json | 2 +-
.../parse5-html-rewriting-stream/package.json | 6 +-
.../lib/index.js | 4 +
.../package.json | 4 +-
packages/parse5-parser-stream/package.json | 4 +-
.../package.json | 6 +-
packages/parse5-sax-parser/docs/index.md | 4 +-
packages/parse5-sax-parser/package.json | 4 +-
.../parse5-serializer-stream/package.json | 4 +-
.../docs/source-code-location/end-location.md | 48 +++++++
.../parse5/docs/tree-adapter/interface.md | 21 ++-
.../extensions/location-info/parser-mixin.js | 23 ++--
packages/parse5/lib/parser/index.js | 122 ++++--------------
packages/parse5/lib/tree-adapters/default.js | 4 +
packages/parse5/package.json | 2 +-
.../parse5/test/location-info-parser.test.js | 55 +++++++-
25 files changed, 254 insertions(+), 216 deletions(-)
create mode 100644 .eslintrc.js
create mode 100644 .prettierignore
create mode 100644 .prettierrc
create mode 100644 .travis.yml
delete mode 100644 README.OpenSource
delete mode 100644 README.en.md
delete mode 100644 bundle.json
create mode 100644 packages/parse5/docs/source-code-location/end-location.md
diff --git a/.eslintrc.js b/.eslintrc.js
new file mode 100644
index 0000000..c3774cd
--- /dev/null
+++ b/.eslintrc.js
@@ -0,0 +1,20 @@
+module.exports = {
+ env: {
+ es6: true,
+ node: true
+ },
+ extends: ['eslint:recommended', 'prettier'],
+ plugins: ['prettier'],
+ rules: {
+ 'prettier/prettier': 'error',
+ 'no-console': 'error',
+ curly: ['error', 'all'],
+ 'prefer-arrow-callback': 'error',
+ 'one-var': ['error', 'never'],
+ 'no-var': 'error',
+ 'prefer-const': 'error'
+ },
+ parserOptions: {
+ ecmaVersion: 6
+ }
+};
diff --git a/.prettierignore b/.prettierignore
new file mode 100644
index 0000000..f93620d
--- /dev/null
+++ b/.prettierignore
@@ -0,0 +1,2 @@
+packages/parse5/lib/tokenizer/named-entity-data.js
+docs
diff --git a/.prettierrc b/.prettierrc
new file mode 100644
index 0000000..ca3f154
--- /dev/null
+++ b/.prettierrc
@@ -0,0 +1,3 @@
+printWidth: 120
+tabWidth: 4
+singleQuote: true
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..68bfbf7
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,4 @@
+language: node_js
+sudo: false
+node_js:
+ - stable
diff --git a/README.OpenSource b/README.OpenSource
deleted file mode 100644
index 7c930ff..0000000
--- a/README.OpenSource
+++ /dev/null
@@ -1,11 +0,0 @@
-[
- {
- "Name": "parse5",
- "License": "MIT",
- "License File": "NOTICE",
- "Version Number": " 5.1.1",
- "Owner": "sunbingxin@huawei.com",
- "Upstream URL": "https://github.com/inikulin/parse5.git",
- "Description": "HTML parser and serializer."
- }
-]
diff --git a/README.en.md b/README.en.md
deleted file mode 100644
index ff43d00..0000000
--- a/README.en.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# third_party_parse5
-
-#### Description
-HTML parser and serializer.
-
-#### License
-Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
index 77f07f3..c91e9ab 100644
--- a/README.md
+++ b/README.md
@@ -1,25 +1,37 @@
-# third_party_parse5
+
+
+
+
+
-#### 介绍
-HTML parser and serializer.
+
+HTML parsing/serialization toolset for Node.js. WHATWG HTML Living Standard (aka HTML5)-compliant.
+
-#### License
-Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
+
+
+
+
+
+
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
+
+parse5 provides nearly everything you may need when dealing with HTML. It's the fastest spec-compliant HTML parser
+for Node to date. It parses HTML the way the latest version of your browser does. It has proven itself reliable in such projects
+as jsdom, Angular2, Polymer and many more.
+
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
+---
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
\ No newline at end of file
+
+ List of parse5 toolset packages
+
+
+
+ Online playground
+
+
+
+ Version history
+
+
diff --git a/bundle.json b/bundle.json
deleted file mode 100644
index 4d74a14..0000000
--- a/bundle.json
+++ /dev/null
@@ -1,30 +0,0 @@
-{
- "name": "@ohos/parse5",
- "description": "HTML parser and serializer.",
- "version": "3.1",
- "license": "Apache V2",
- "publishAs": "code-segment",
- "segment": {
- "destPath": "third_party/parse5"
- },
- "dirs": {},
- "scripts": {},
- "component": {
- "name": "thirdparty_parse5",
- "subsystem": "developtools",
- "syscap": [],
- "features": [],
- "adapted_system_type": [],
- "rom": "",
- "ram": "",
- "deps": {
- "components": [],
- "third_party": []
- },
- "build": {
- "sub_component": [],
- "inner_kits": [],
- "test": []
- }
- }
-}
\ No newline at end of file
diff --git a/docs/version-history.md b/docs/version-history.md
index 8d60a00..43ddae4 100644
--- a/docs/version-history.md
+++ b/docs/version-history.md
@@ -1,5 +1,15 @@
# Version history
+# 6.0.1
+* Fixed: Handling of self-closing `
` tags (by [@43081j](https://github.com/43081j)).
+* Fixed: Broken link in TreeAdapter document (GH [#317](https://github.com/inikulin/parse5/issues/317)) (by [@ursm](https://github.com/ursm)).
+* Fixed: SAXParser example (GH [#316](https://github.com/inikulin/parse5/issues/316)) (by [@mvasilkov](https://github.com/mvasilkov)).
+
+# 6.0.0
+* Added (**breaking**): Tree adapter interface now has `updateNodeSourceCodeLocation` method which
+enables usage of custom location info formats (GH [#314](https://github.com/inikulin/parse5/issues/314)) (by [@DMartens](https://github.com/DMartens)).
+
+
# 5.1.1
* Fixed: Serialization of attributes in non-standard namespaces (by [@Zirro](https://github.com/Zirro)).
* Fixed: Quirks and limited-quirks mode detection by doctype (by [@squidfunk](https://github.com/squidfunk)).
diff --git a/lerna.json b/lerna.json
index fa42581..6cf0b0f 100644
--- a/lerna.json
+++ b/lerna.json
@@ -1,5 +1,5 @@
{
"lerna": "2.10.2",
"packages": ["packages/*"],
- "version": "5.1.1"
+ "version": "6.0.1"
}
diff --git a/packages/parse5-html-rewriting-stream/package.json b/packages/parse5-html-rewriting-stream/package.json
index b95de57..e4bf660 100644
--- a/packages/parse5-html-rewriting-stream/package.json
+++ b/packages/parse5-html-rewriting-stream/package.json
@@ -1,7 +1,7 @@
{
"name": "parse5-html-rewriting-stream",
"description": "Streaming HTML rewriter.",
- "version": "5.1.1",
+ "version": "6.0.1",
"author": "Ivan Nikulin (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://github.com/inikulin/parse5",
@@ -17,8 +17,8 @@
"license": "MIT",
"main": "./lib/index.js",
"dependencies": {
- "parse5": "^5.1.1",
- "parse5-sax-parser": "^5.1.1"
+ "parse5": "^6.0.1",
+ "parse5-sax-parser": "^6.0.1"
},
"repository": {
"type": "git",
diff --git a/packages/parse5-htmlparser2-tree-adapter/lib/index.js b/packages/parse5-htmlparser2-tree-adapter/lib/index.js
index 8240473..58464d7 100644
--- a/packages/parse5-htmlparser2-tree-adapter/lib/index.js
+++ b/packages/parse5-htmlparser2-tree-adapter/lib/index.js
@@ -342,3 +342,7 @@ exports.setNodeSourceCodeLocation = function(node, location) {
exports.getNodeSourceCodeLocation = function(node) {
return node.sourceCodeLocation;
};
+
+exports.updateNodeSourceCodeLocation = function(node, endLocation) {
+ node.sourceCodeLocation = Object.assign(node.sourceCodeLocation, endLocation);
+};
diff --git a/packages/parse5-htmlparser2-tree-adapter/package.json b/packages/parse5-htmlparser2-tree-adapter/package.json
index 4c71c1b..cb8d818 100644
--- a/packages/parse5-htmlparser2-tree-adapter/package.json
+++ b/packages/parse5-htmlparser2-tree-adapter/package.json
@@ -1,7 +1,7 @@
{
"name": "parse5-htmlparser2-tree-adapter",
"description": "htmlparser2 tree adapter for parse5.",
- "version": "5.1.1",
+ "version": "6.0.1",
"author": "Ivan Nikulin (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://github.com/inikulin/parse5",
@@ -14,7 +14,7 @@
"license": "MIT",
"main": "./lib/index.js",
"dependencies": {
- "parse5": "^5.1.1"
+ "parse5": "^6.0.1"
},
"repository": {
"type": "git",
diff --git a/packages/parse5-parser-stream/package.json b/packages/parse5-parser-stream/package.json
index 284dd04..068e075 100644
--- a/packages/parse5-parser-stream/package.json
+++ b/packages/parse5-parser-stream/package.json
@@ -1,7 +1,7 @@
{
"name": "parse5-parser-stream",
"description": "Streaming HTML parser with scripting support.",
- "version": "5.1.1",
+ "version": "6.0.1",
"author": "Ivan Nikulin (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://github.com/inikulin/parse5",
@@ -14,7 +14,7 @@
"license": "MIT",
"main": "./lib/index.js",
"dependencies": {
- "parse5": "^5.1.1"
+ "parse5": "^6.0.1"
},
"repository": {
"type": "git",
diff --git a/packages/parse5-plain-text-conversion-stream/package.json b/packages/parse5-plain-text-conversion-stream/package.json
index 7db65e7..974a0d1 100644
--- a/packages/parse5-plain-text-conversion-stream/package.json
+++ b/packages/parse5-plain-text-conversion-stream/package.json
@@ -1,7 +1,7 @@
{
"name": "parse5-plain-text-conversion-stream",
"description": "Stream that converts plain text files into HTML document.",
- "version": "5.1.1",
+ "version": "6.0.1",
"author": "Ivan Nikulin (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://github.com/inikulin/parse5",
@@ -17,8 +17,8 @@
"license": "MIT",
"main": "./lib/index.js",
"dependencies": {
- "parse5": "^5.1.1",
- "parse5-parser-stream": "^5.1.1"
+ "parse5": "^6.0.1",
+ "parse5-parser-stream": "^6.0.1"
},
"repository": {
"type": "git",
diff --git a/packages/parse5-sax-parser/docs/index.md b/packages/parse5-sax-parser/docs/index.md
index a89b295..a089a95 100644
--- a/packages/parse5-sax-parser/docs/index.md
+++ b/packages/parse5-sax-parser/docs/index.md
@@ -87,8 +87,8 @@ const fs = require('fs');
const file = fs.createWriteStream('google.com.html');
const parser = new SAXParser();
-parser.on('doctype', (name, publicId, systemId) => {
- // Process doctype info ans stop parsing
+parser.on('doctype', ({ name, publicId, systemId }) => {
+ // Process doctype info and stop parsing
...
parser.stop();
});
diff --git a/packages/parse5-sax-parser/package.json b/packages/parse5-sax-parser/package.json
index f8873fa..0924152 100644
--- a/packages/parse5-sax-parser/package.json
+++ b/packages/parse5-sax-parser/package.json
@@ -1,7 +1,7 @@
{
"name": "parse5-sax-parser",
"description": "Streaming SAX-style HTML parser.",
- "version": "5.1.1",
+ "version": "6.0.1",
"author": "Ivan Nikulin (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://github.com/inikulin/parse5",
@@ -15,7 +15,7 @@
"license": "MIT",
"main": "./lib/index.js",
"dependencies": {
- "parse5": "^5.1.1"
+ "parse5": "^6.0.1"
},
"repository": {
"type": "git",
diff --git a/packages/parse5-serializer-stream/package.json b/packages/parse5-serializer-stream/package.json
index 00a6715..2c2948a 100644
--- a/packages/parse5-serializer-stream/package.json
+++ b/packages/parse5-serializer-stream/package.json
@@ -1,7 +1,7 @@
{
"name": "parse5-serializer-stream",
"description": "Streaming HTML serializer.",
- "version": "5.1.1",
+ "version": "6.0.1",
"author": "Ivan Nikulin (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://github.com/inikulin/parse5",
@@ -16,7 +16,7 @@
"license": "MIT",
"main": "./lib/index.js",
"dependencies": {
- "parse5": "^5.1.1"
+ "parse5": "^6.0.1"
},
"repository": {
"type": "git",
diff --git a/packages/parse5/docs/source-code-location/end-location.md b/packages/parse5/docs/source-code-location/end-location.md
new file mode 100644
index 0000000..8b3bd57
--- /dev/null
+++ b/packages/parse5/docs/source-code-location/end-location.md
@@ -0,0 +1,48 @@
+# Interface: EndLocation
+
+### Properties
+
+* [endCol](#endcol)
+* [endOffset](#endoffset)
+* [endLine](#endline)
+* [endTag](#endtag)
+
+---
+
+## Properties
+
+
+
+### endCol
+
+**● endCol**: *`number`*
+
+One-based column index of the last character
+
+___
+
+
+### endOffset
+
+**● endOffset**: *`number`*
+
+Zero-based last character index
+
+___
+
+
+### endLine
+
+**● endLine**: *`number`*
+
+One-based line index of the last character
+
+___
+
+
+### endTag
+
+**● endTag**: *[Location](location.md)|undefined*
+
+Element's end tag location info.
+This property is undefined, if the element has no closing tag.
\ No newline at end of file
diff --git a/packages/parse5/docs/tree-adapter/interface.md b/packages/parse5/docs/tree-adapter/interface.md
index ac1e565..238fb19 100644
--- a/packages/parse5/docs/tree-adapter/interface.md
+++ b/packages/parse5/docs/tree-adapter/interface.md
@@ -2,7 +2,7 @@
Tree adapter is a set of utility functions that provides minimal required abstraction layer beetween parser and a specific AST format. Note that `TreeAdapter` is not designed to be a general purpose AST manipulation library. You can build such library on top of existing `TreeAdapter` or use one of the existing libraries from npm.
-*__See__*: [default implementation](https://github.com/inikulin/parse5/blob/master/lib/tree_adapters/default.js)
+*__See__*: [default implementation](https://github.com/inikulin/parse5/blob/master/packages/parse5/lib/tree-adapters/default.js)
### Methods
@@ -38,7 +38,7 @@ Tree adapter is a set of utility functions that provides minimal required abstra
* [setDocumentType](#setdocumenttype)
* [setNodeSourceCodeLocation](#setnodesourcecodelocation)
* [setTemplateContent](#settemplatecontent)
-
+* [updateNodeSourceCodeLocation](#updatenodesourcecodelocation)
---
## Methods
@@ -588,6 +588,21 @@ Sets the `` element content element.
| contentElement | DocumentFragment | Content element. |
**Returns:** `void`
-
___
+
+
+### updateNodeSourceCodeLocation
+
+▸ **updateNodeSourceCodeLocation**(node: *Node*, endLocation: *[EndLocation](../source-code-location/end-location.md)*): `void`
+
+Updates the source code location of nodes.
+
+**Parameters:**
+| Param | Type | Description |
+| ------ | ------ | ------ |
+| node | Node | Node. |
+| endLocation | [EndLocation](../source-code-location/end-location.md) | Source code location information of the end of the node. |
+
+**Returns:** `void`
+___
diff --git a/packages/parse5/lib/extensions/location-info/parser-mixin.js b/packages/parse5/lib/extensions/location-info/parser-mixin.js
index fcf3a40..e7d3e2d 100644
--- a/packages/parse5/lib/extensions/location-info/parser-mixin.js
+++ b/packages/parse5/lib/extensions/location-info/parser-mixin.js
@@ -43,17 +43,19 @@ class LocationInfoParserMixin extends Mixin {
// NOTE: For cases like
- First 'p' closes without a closing
// tag and for cases like | - 'p' closes without a closing tag.
const isClosingEndTag = closingToken.type === Tokenizer.END_TAG_TOKEN && tn === closingToken.tagName;
-
+ const endLoc = {};
if (isClosingEndTag) {
- loc.endTag = Object.assign({}, ctLoc);
- loc.endLine = ctLoc.endLine;
- loc.endCol = ctLoc.endCol;
- loc.endOffset = ctLoc.endOffset;
+ endLoc.endTag = Object.assign({}, ctLoc);
+ endLoc.endLine = ctLoc.endLine;
+ endLoc.endCol = ctLoc.endCol;
+ endLoc.endOffset = ctLoc.endOffset;
} else {
- loc.endLine = ctLoc.startLine;
- loc.endCol = ctLoc.startCol;
- loc.endOffset = ctLoc.startOffset;
+ endLoc.endLine = ctLoc.startLine;
+ endLoc.endCol = ctLoc.startCol;
+ endLoc.endOffset = ctLoc.startOffset;
}
+
+ this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc);
}
}
}
@@ -208,9 +210,8 @@ class LocationInfoParserMixin extends Mixin {
const tnLoc = this.treeAdapter.getNodeSourceCodeLocation(textNode);
if (tnLoc) {
- tnLoc.endLine = token.location.endLine;
- tnLoc.endCol = token.location.endCol;
- tnLoc.endOffset = token.location.endOffset;
+ const { endLine, endCol, endOffset } = token.location;
+ this.treeAdapter.updateNodeSourceCodeLocation(textNode, { endLine, endCol, endOffset });
} else {
this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location);
}
diff --git a/packages/parse5/lib/parser/index.js b/packages/parse5/lib/parser/index.js
index 5eececd..45d3e83 100644
--- a/packages/parse5/lib/parser/index.js
+++ b/packages/parse5/lib/parser/index.js
@@ -325,15 +325,6 @@ class Parser {
this.treeAdapter = this.options.treeAdapter;
this.pendingScript = null;
- this.nodeInfo = {};
-
- if(this.options.componentValidator){
- this.validator = this.options.componentValidator;
- }
-
- if(this.options.compileResult){
- this.compileResult = this.options.compileResult;
- }
if (this.options.sourceCodeLocationInfo) {
Mixin.install(this, LocationInfoParserMixin);
@@ -425,32 +416,33 @@ class Parser {
//Parsing loop
_runParsingLoop(scriptHandler) {
- let lastToken = {};
while (!this.stopped) {
- this._setupTokenizerCDATAMode();
- const token = this.tokenizer.getNextToken();
- if (token.type === Tokenizer.HIBERNATION_TOKEN) {
- break;
- }
- if (token.type !== Tokenizer.EOF_TOKEN && token.type !== Tokenizer.WHITESPACE_CHARACTER_TOKEN) {
- lastToken =token;
- }
- checkselfClosingNode(this, token);
- if (this.skipNextNewLine) {
- this.skipNextNewLine = false;
- if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') {
- if (token.chars.length === 1) {
- continue;
- }
- token.chars = token.chars.substr(1);
+ this._setupTokenizerCDATAMode();
+
+ const token = this.tokenizer.getNextToken();
+
+ if (token.type === Tokenizer.HIBERNATION_TOKEN) {
+ break;
+ }
+
+ if (this.skipNextNewLine) {
+ this.skipNextNewLine = false;
+
+ if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') {
+ if (token.chars.length === 1) {
+ continue;
+ }
+
+ token.chars = token.chars.substr(1);
+ }
+ }
+
+ this._processInputToken(token);
+
+ if (scriptHandler && this.pendingScript) {
+ break;
}
- }
- this._processInputToken(token);
- if (scriptHandler && this.pendingScript) {
- break;
- }
}
- checkInvalid(this, lastToken);
}
runParsingLoopForCurrentChunk(writeCallback, scriptHandler) {
@@ -890,70 +882,6 @@ class Parser {
}
}
-/**
- * Check if the node is self closing.
- * @param {Object} parse parse5 object.
- * @param {Object} token Hml text token information.
- */
-function checkselfClosingNode(parse, token) {
- const tagName = (token.tagName || "").toLowerCase();
- const selfClosing = token.selfClosing;
- const flag = parse.validator.isSupportedSelfClosing(tagName);
- if (parse.nodeInfo.tn && tagName && !parse.nodeInfo.sc) {
- const loc =
- String(token.location.startLine) + String(token.location.startCol);
- if (
- !flag ||
- (loc !== parse.nodeInfo.pos && token.type === Tokenizer.START_TAG_TOKEN)
- ) {
- parse.compileResult.log.push({
- line: String(token.location.startLine) || 1,
- column: String(token.location.startCol) || 1,
- reason: 'ERROR: tag `' + parse.nodeInfo.tn + '` must be closed, please follow norm',
- });
- parse.nodeInfo = {};
- }
- }
- if (tagName && flag) {
- if (token.type === Tokenizer.START_TAG_TOKEN && !selfClosing) {
- parse.nodeInfo.tn = tagName;
- parse.nodeInfo.sc = false;
- parse.nodeInfo.pos =
- String(token.location.line) + String(token.location.col);
- }
- if (
- token.type === Tokenizer.END_TAG_TOKEN &&
- tagName === parse.nodeInfo.tn
- ) {
- parse.nodeInfo.sc = true;
- }
- }
- if (!flag && selfClosing && token.type === Tokenizer.START_TAG_TOKEN) {
- parse.compileResult.log.push({
- line: token.location.startLine || 1,
- column: token.location.startCol || 1,
- reason: "ERROR: tag `" + tagName + "` can not use selfClosing",
- });
- }
-}
-
-/**
- * Check if the html text is legal.
- * @param {Object} lastToken Hml text last token information.
- */
-function checkInvalid(lastToken) {
- if (
- lastToken.type && lastToken.type !== Tokenizer.END_TAG_TOKEN &&
- lastToken.type !== Tokenizer.COMMENT_TOKEN
- ) {
- compileResult.log.push({
- line: lastToken.location.startLine || 1,
- column: lastToken.location.startCol || 1,
- reason: "ERROR: hml content is invalid. Please check it.",
- });
- }
-}
-
module.exports = Parser;
//Adoption agency algorithm
@@ -1618,7 +1546,7 @@ function hrStartTagInBody(p, token) {
p._appendElement(token, NS.HTML);
p.framesetOk = false;
- p.ackSelfClosing = true;
+ token.ackSelfClosing = true;
}
function imageStartTagInBody(p, token) {
diff --git a/packages/parse5/lib/tree-adapters/default.js b/packages/parse5/lib/tree-adapters/default.js
index 7935a0e..14d007a 100644
--- a/packages/parse5/lib/tree-adapters/default.js
+++ b/packages/parse5/lib/tree-adapters/default.js
@@ -215,3 +215,7 @@ exports.setNodeSourceCodeLocation = function(node, location) {
exports.getNodeSourceCodeLocation = function(node) {
return node.sourceCodeLocation;
};
+
+exports.updateNodeSourceCodeLocation = function(node, endLocation) {
+ node.sourceCodeLocation = Object.assign(node.sourceCodeLocation, endLocation);
+};
diff --git a/packages/parse5/package.json b/packages/parse5/package.json
index 7e93473..13a12df 100644
--- a/packages/parse5/package.json
+++ b/packages/parse5/package.json
@@ -1,7 +1,7 @@
{
"name": "parse5",
"description": "HTML parser and serializer.",
- "version": "5.1.1",
+ "version": "6.0.1",
"author": "Ivan Nikulin (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://github.com/inikulin/parse5",
diff --git a/packages/parse5/test/location-info-parser.test.js b/packages/parse5/test/location-info-parser.test.js
index 22f474c..4d4ce68 100644
--- a/packages/parse5/test/location-info-parser.test.js
+++ b/packages/parse5/test/location-info-parser.test.js
@@ -7,7 +7,7 @@ const {
assertStartTagLocation,
assertNodeLocation
} = require('../../../test/utils/generate-location-info-parser-tests');
-const { generateTestsForEachTreeAdapter } = require('../../../test/utils/common');
+const { generateTestsForEachTreeAdapter, treeAdapters } = require('../../../test/utils/common');
generateLocationInfoParserTests(module.exports, 'Parser', (input, opts) => ({
node: parse5.parse(input, opts)
@@ -127,3 +127,56 @@ generateTestsForEachTreeAdapter(module.exports, (_test, treeAdapter) => {
assert.ok(!location.endTag);
};
});
+
+exports['Updating node source code location (GH-314)'] = function() {
+ const sourceCodeLocationSetter = {
+ setNodeSourceCodeLocation(node, location) {
+ if (location === null) {
+ node.sourceCodeLocation = null;
+ } else {
+ node.sourceCodeLocation = {
+ start: {
+ line: location.startLine,
+ column: location.startCol,
+ offset: location.startOffset
+ },
+ end: {
+ line: location.endLine,
+ column: location.endCol,
+ offset: location.endOffset
+ }
+ };
+ }
+ },
+ updateNodeSourceCodeLocation(node, endLocation) {
+ node.sourceCodeLocation = {
+ start: node.sourceCodeLocation.start,
+ end: {
+ line: endLocation.endLine,
+ column: endLocation.endCol,
+ offset: endLocation.endOffset
+ }
+ };
+ }
+ };
+ const adapter = Object.assign(treeAdapters.default, sourceCodeLocationSetter);
+ const document = parse5.parse('Testing location', { adapter, sourceCodeLocationInfo: true });
+ const [doctype, html] = document.childNodes;
+ const [head, body] = html.childNodes;
+ const [text] = body.childNodes;
+
+ assert.deepEqual(doctype.sourceCodeLocation, {
+ start: { line: 1, column: 1, offset: 0 },
+ end: { line: 1, column: 11, offset: 10 }
+ });
+ assert.strictEqual(html.sourceCodeLocation, null);
+ assert.strictEqual(head.sourceCodeLocation, null);
+ assert.deepEqual(body.sourceCodeLocation, {
+ start: { line: 1, column: 11, offset: 10 },
+ end: { line: 1, column: 40, offset: 39 }
+ });
+ assert.deepEqual(text.sourceCodeLocation, {
+ start: { line: 1, column: 17, offset: 16 },
+ end: { line: 1, column: 33, offset: 32 }
+ });
+};
--
Gitee
From 79a9d0a7b4106d5092765adef13c828a7223a5a4 Mon Sep 17 00:00:00 2001
From: bojiang
Date: Wed, 24 Aug 2022 15:38:16 +0800
Subject: [PATCH 2/6] jiangbo91@huawei.com
Signed-off-by: bojiang
---
packages/parse5/lib/parser/index.js | 82 ++++++++++++++++++++++++++++-
1 file changed, 80 insertions(+), 2 deletions(-)
diff --git a/packages/parse5/lib/parser/index.js b/packages/parse5/lib/parser/index.js
index 45d3e83..4da4fd1 100644
--- a/packages/parse5/lib/parser/index.js
+++ b/packages/parse5/lib/parser/index.js
@@ -325,6 +325,15 @@ class Parser {
this.treeAdapter = this.options.treeAdapter;
this.pendingScript = null;
+ this.nodeInfo = {};
+
+ if(this.options.componentValidator){
+ this.validator = this.options.componentValidator;
+ }
+
+ if(this.options.compileResult){
+ this.compileResult = this.options.compileResult;
+ }
if (this.options.sourceCodeLocationInfo) {
Mixin.install(this, LocationInfoParserMixin);
@@ -416,6 +425,7 @@ class Parser {
//Parsing loop
_runParsingLoop(scriptHandler) {
+ let lastToken = {};
while (!this.stopped) {
this._setupTokenizerCDATAMode();
@@ -424,7 +434,10 @@ class Parser {
if (token.type === Tokenizer.HIBERNATION_TOKEN) {
break;
}
-
+ if (token.type !== Tokenizer.EOF_TOKEN && token.type !== Tokenizer.WHITESPACE_CHARACTER_TOKEN) {
+ lastToken =token;
+ }
+ checkselfClosingNode(this, token);
if (this.skipNextNewLine) {
this.skipNextNewLine = false;
@@ -443,6 +456,7 @@ class Parser {
break;
}
}
+ checkInvalid(this, lastToken);
}
runParsingLoopForCurrentChunk(writeCallback, scriptHandler) {
@@ -882,6 +896,70 @@ class Parser {
}
}
+/**
+ * Check if the node is self closing.
+ * @param {Object} parse parse5 object.
+ * @param {Object} token Hml text token information.
+ */
+ function checkselfClosingNode(parse, token) {
+ const tagName = (token.tagName || "").toLowerCase();
+ const selfClosing = token.selfClosing;
+ const flag = parse.validator.isSupportedSelfClosing(tagName);
+ if (parse.nodeInfo.tn && tagName && !parse.nodeInfo.sc) {
+ const loc =
+ String(token.location.startLine) + String(token.location.startCol);
+ if (
+ !flag ||
+ (loc !== parse.nodeInfo.pos && token.type === Tokenizer.START_TAG_TOKEN)
+ ) {
+ parse.compileResult.log.push({
+ line: String(token.location.startLine) || 1,
+ column: String(token.location.startCol) || 1,
+ reason: 'ERROR: tag `' + parse.nodeInfo.tn + '` must be closed, please follow norm',
+ });
+ parse.nodeInfo = {};
+ }
+ }
+ if (tagName && flag) {
+ if (token.type === Tokenizer.START_TAG_TOKEN && !selfClosing) {
+ parse.nodeInfo.tn = tagName;
+ parse.nodeInfo.sc = false;
+ parse.nodeInfo.pos =
+ String(token.location.line) + String(token.location.col);
+ }
+ if (
+ token.type === Tokenizer.END_TAG_TOKEN &&
+ tagName === parse.nodeInfo.tn
+ ) {
+ parse.nodeInfo.sc = true;
+ }
+ }
+ if (!flag && selfClosing && token.type === Tokenizer.START_TAG_TOKEN) {
+ parse.compileResult.log.push({
+ line: token.location.startLine || 1,
+ column: token.location.startCol || 1,
+ reason: "ERROR: tag `" + tagName + "` can not use selfClosing",
+ });
+ }
+ }
+
+ /**
+ * Check if the html text is legal.
+ * @param {Object} lastToken Hml text last token information.
+ */
+ function checkInvalid(lastToken) {
+ if (
+ lastToken.type && lastToken.type !== Tokenizer.END_TAG_TOKEN &&
+ lastToken.type !== Tokenizer.COMMENT_TOKEN
+ ) {
+ compileResult.log.push({
+ line: lastToken.location.startLine || 1,
+ column: lastToken.location.startCol || 1,
+ reason: "ERROR: hml content is invalid. Please check it.",
+ });
+ }
+}
+
module.exports = Parser;
//Adoption agency algorithm
@@ -1546,7 +1624,7 @@ function hrStartTagInBody(p, token) {
p._appendElement(token, NS.HTML);
p.framesetOk = false;
- token.ackSelfClosing = true;
+ p.ackSelfClosing = true;
}
function imageStartTagInBody(p, token) {
--
Gitee
From b1681e6c7fce0a675379606777050fb92de02290 Mon Sep 17 00:00:00 2001
From: bojiang
Date: Wed, 24 Aug 2022 16:26:35 +0800
Subject: [PATCH 3/6] jiangbo91@huawei.com
Signed-off-by: bojiang
---
README.OpenSource | 11 ++++++
README.en.md | 25 ++++++++++++++
README.md | 52 +++++++++++------------------
packages/parse5/lib/parser/index.js | 2 +-
4 files changed, 57 insertions(+), 33 deletions(-)
create mode 100644 README.OpenSource
create mode 100644 README.en.md
diff --git a/README.OpenSource b/README.OpenSource
new file mode 100644
index 0000000..82c6c7a
--- /dev/null
+++ b/README.OpenSource
@@ -0,0 +1,11 @@
+[
+ {
+ "Name": "parse5",
+ "License": "MIT",
+ "License File": "NOTICE",
+ "Version Number": " 6.0.1",
+ "Owner": "sunbingxin@huawei.com",
+ "Upstream URL": "https://github.com/inikulin/parse5.git",
+ "Description": "HTML parser and serializer."
+ }
+]
diff --git a/README.en.md b/README.en.md
new file mode 100644
index 0000000..ff43d00
--- /dev/null
+++ b/README.en.md
@@ -0,0 +1,25 @@
+# third_party_parse5
+
+#### Description
+HTML parser and serializer.
+
+#### License
+Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
index c91e9ab..77f07f3 100644
--- a/README.md
+++ b/README.md
@@ -1,37 +1,25 @@
-
-
-
-
-
+# third_party_parse5
-
-HTML parsing/serialization toolset for Node.js. WHATWG HTML Living Standard (aka HTML5)-compliant.
-
+#### 介绍
+HTML parser and serializer.
-
-
-
-
-
-
+#### License
+Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
-
-parse5 provides nearly everything you may need when dealing with HTML. It's the fastest spec-compliant HTML parser
-for Node to date. It parses HTML the way the latest version of your browser does. It has proven itself reliable in such projects
-as jsdom, Angular2, Polymer and many more.
-
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
----
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
-
- List of parse5 toolset packages
-
-
-
- Online playground
-
-
-
- Version history
-
-
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/packages/parse5/lib/parser/index.js b/packages/parse5/lib/parser/index.js
index 4da4fd1..7e98f17 100644
--- a/packages/parse5/lib/parser/index.js
+++ b/packages/parse5/lib/parser/index.js
@@ -1624,7 +1624,7 @@ function hrStartTagInBody(p, token) {
p._appendElement(token, NS.HTML);
p.framesetOk = false;
- p.ackSelfClosing = true;
+ token.ackSelfClosing = true;
}
function imageStartTagInBody(p, token) {
--
Gitee
From e0389facee5544ed62619779c6cdab6b0747da08 Mon Sep 17 00:00:00 2001
From: bojiang
Date: Wed, 24 Aug 2022 16:46:47 +0800
Subject: [PATCH 4/6] jiangbo91@huawei.com
Signed-off-by: bojiang
---
README.OpenSource | 11 -----------
1 file changed, 11 deletions(-)
delete mode 100644 README.OpenSource
diff --git a/README.OpenSource b/README.OpenSource
deleted file mode 100644
index 82c6c7a..0000000
--- a/README.OpenSource
+++ /dev/null
@@ -1,11 +0,0 @@
-[
- {
- "Name": "parse5",
- "License": "MIT",
- "License File": "NOTICE",
- "Version Number": " 6.0.1",
- "Owner": "sunbingxin@huawei.com",
- "Upstream URL": "https://github.com/inikulin/parse5.git",
- "Description": "HTML parser and serializer."
- }
-]
--
Gitee
From da5634ade94ae00204b0281750a44976d56e6ace Mon Sep 17 00:00:00 2001
From: bojiang
Date: Wed, 24 Aug 2022 17:25:36 +0800
Subject: [PATCH 5/6] jiangbo91@huawei.com
Signed-off-by: bojiang
---
.prettierignore | 2 --
.prettierrc | 3 ---
.travis.yml | 4 ----
README.OpenSource | 11 +++++++++++
packages/parse5/lib/parser/index.js | 2 +-
5 files changed, 12 insertions(+), 10 deletions(-)
delete mode 100644 .prettierignore
delete mode 100644 .prettierrc
delete mode 100644 .travis.yml
create mode 100644 README.OpenSource
diff --git a/.prettierignore b/.prettierignore
deleted file mode 100644
index f93620d..0000000
--- a/.prettierignore
+++ /dev/null
@@ -1,2 +0,0 @@
-packages/parse5/lib/tokenizer/named-entity-data.js
-docs
diff --git a/.prettierrc b/.prettierrc
deleted file mode 100644
index ca3f154..0000000
--- a/.prettierrc
+++ /dev/null
@@ -1,3 +0,0 @@
-printWidth: 120
-tabWidth: 4
-singleQuote: true
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 68bfbf7..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,4 +0,0 @@
-language: node_js
-sudo: false
-node_js:
- - stable
diff --git a/README.OpenSource b/README.OpenSource
new file mode 100644
index 0000000..82c6c7a
--- /dev/null
+++ b/README.OpenSource
@@ -0,0 +1,11 @@
+[
+ {
+ "Name": "parse5",
+ "License": "MIT",
+ "License File": "NOTICE",
+ "Version Number": " 6.0.1",
+ "Owner": "sunbingxin@huawei.com",
+ "Upstream URL": "https://github.com/inikulin/parse5.git",
+ "Description": "HTML parser and serializer."
+ }
+]
diff --git a/packages/parse5/lib/parser/index.js b/packages/parse5/lib/parser/index.js
index 7e98f17..dd2dd28 100644
--- a/packages/parse5/lib/parser/index.js
+++ b/packages/parse5/lib/parser/index.js
@@ -959,7 +959,7 @@ class Parser {
});
}
}
-
+
module.exports = Parser;
//Adoption agency algorithm
--
Gitee
From 530892f70e5d9b85a58a8ffa4adf0451bc0d180b Mon Sep 17 00:00:00 2001
From: Bo Jiang
Date: Thu, 25 Aug 2022 01:23:28 +0000
Subject: [PATCH 6/6] update packages/parse5/lib/parser/index.js.
Signed-off-by: Bo Jiang
---
packages/parse5/lib/parser/index.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/packages/parse5/lib/parser/index.js b/packages/parse5/lib/parser/index.js
index dd2dd28..8ac84a7 100644
--- a/packages/parse5/lib/parser/index.js
+++ b/packages/parse5/lib/parser/index.js
@@ -1624,7 +1624,7 @@ function hrStartTagInBody(p, token) {
p._appendElement(token, NS.HTML);
p.framesetOk = false;
- token.ackSelfClosing = true;
+ p.ackSelfClosing = true;
}
function imageStartTagInBody(p, token) {
--
Gitee