diff --git a/README.OpenSource b/README.OpenSource
index dd3ed57bebfe132c692c6fc9c0c2419153737638..2852d965038ff935767d67ddec06ac4538875ea5 100644
--- a/README.OpenSource
+++ b/README.OpenSource
@@ -3,7 +3,7 @@
"Name": "parse5",
"License": "MIT",
"License File": "NOTICE",
- "Version Number": " 6.0.1",
+ "Version Number": "7.0.0",
"Owner": "sunbingxin@huawei.com",
"Upstream URL": "https://github.com/inikulin/parse5.git",
"Description": "HTML parser and serializer."
diff --git a/bench/.eslintrc.js b/bench/.eslintrc.js
deleted file mode 100644
index 401a667b5f7c560961dbdd5af01e65c9c300057c..0000000000000000000000000000000000000000
--- a/bench/.eslintrc.js
+++ /dev/null
@@ -1,9 +0,0 @@
-module.exports = {
- extends: ['../.eslintrc.js'],
- rules: {
- 'no-console': 'off'
- },
- parserOptions: {
- ecmaVersion: 8
- }
-};
diff --git a/bench/.eslintrc.json b/bench/.eslintrc.json
new file mode 100644
index 0000000000000000000000000000000000000000..1cb404c3c102ba186ee167c09c3d124a98054877
--- /dev/null
+++ b/bench/.eslintrc.json
@@ -0,0 +1,6 @@
+{
+ "extends": ["../.eslintrc.json"],
+ "rules": {
+ "no-console": "off"
+ }
+}
diff --git a/bench/memory/named-entity-data.js b/bench/memory/named-entity-data.js
deleted file mode 100644
index bca60f005a04f5589fd9c9d6c7b676be9af99bb9..0000000000000000000000000000000000000000
--- a/bench/memory/named-entity-data.js
+++ /dev/null
@@ -1,15 +0,0 @@
-'use strict';
-
-const format = require('human-format');
-
-main();
-
-function main() {
- const before = process.memoryUsage().rss;
-
- require('../../packages/parse5/lib/tokenizer/named-entity-data');
-
- const after = process.memoryUsage().rss;
-
- console.log('Initial memory consumption: ', format(after - before, { unit: 'B' }));
-}
diff --git a/bench/memory/sax-parser.js b/bench/memory/sax-parser.js
index d1e893cf4b5db2c47585fad704b15253df90ed73..0460035ee571736916b2f6bdfd1aafd0102a760b 100644
--- a/bench/memory/sax-parser.js
+++ b/bench/memory/sax-parser.js
@@ -1,43 +1,36 @@
-'use strict';
-
-const fs = require('fs');
-const format = require('human-format');
-const promisifyEvent = require('promisify-event');
-const memwatch = require('node-memwatch');
-const SAXParser = require('../../packages/parse5-sax-parser/lib');
+import { readFile } from 'node:fs/promises';
+import format from 'human-format';
+import memwatch from '@airbnb/node-memwatch';
+import { SAXParser } from '../../packages/parse5-sax-parser/dist/index.js';
+import { finished } from 'parse5-test-utils/dist/common.js';
main();
async function main() {
- let parsedDataSize = 0;
- let maxMemUsage = 0;
- let startDate = null;
- let endDate = null;
const heapDiffMeasurement = new memwatch.HeapDiff();
- let heapDiff = null;
- memwatch.on('stats', stats => {
- maxMemUsage = Math.max(maxMemUsage, stats['current_base']);
+ let maxMemUsage = 0;
+
+ memwatch.on('stats', (stats) => {
+ maxMemUsage = Math.max(maxMemUsage, stats.used_heap_size);
});
- startDate = new Date();
+ const statsPromise = new Promise((resolve) => memwatch.once('stats', resolve));
- const parserPromise = parse().then(dataSize => {
- parsedDataSize = dataSize;
- endDate = new Date();
- heapDiff = heapDiffMeasurement.end();
- });
+ const startDate = new Date();
+
+ const parsedDataSize = await parse();
+ const endDate = new Date();
+ const heapDiff = heapDiffMeasurement.end();
- await Promise.all([
- parserPromise,
- promisifyEvent(memwatch, 'stats') // NOTE: we need at least one `stats` result
- ]);
+ // NOTE: we need at least one `stats` result to get maxMemUsage
+ await statsPromise;
printResults(parsedDataSize, startDate, endDate, heapDiff, maxMemUsage);
}
async function parse() {
- const data = fs.readFileSync('../test/data/huge-page/huge-page.html', 'utf8');
+ const data = await readFile(new URL('../../test/data/huge-page/huge-page.html', import.meta.url), 'utf8');
let parsedDataSize = 0;
const stream = new SAXParser();
@@ -48,7 +41,7 @@ async function parse() {
stream.end();
- await promisifyEvent(stream, 'finish');
+ await finished(stream);
return parsedDataSize;
}
@@ -57,16 +50,16 @@ function getDuration(startDate, endDate) {
const scale = new format.Scale({
seconds: 1,
minutes: 60,
- hours: 3600
+ hours: 3600,
});
- return format((endDate - startDate) / 1000, { scale: scale });
+ return format((endDate - startDate) / 1000, { scale });
}
function printResults(parsedDataSize, startDate, endDate, heapDiff, maxMemUsage) {
console.log('Input data size:', format(parsedDataSize, { unit: 'B' }));
- console.log('Duration: ', getDuration(startDate, endDate));
- console.log('Memory before: ', heapDiff.before.size);
- console.log('Memory after: ', heapDiff.after.size);
- console.log('Memory max: ', format(maxMemUsage, { unit: 'B' }));
+ console.log('Duration:', getDuration(startDate, endDate));
+ console.log('Memory before:', heapDiff.before.size);
+ console.log('Memory after:', heapDiff.after.size);
+ console.log('Memory max:', format(maxMemUsage, { unit: 'B' }));
}
diff --git a/bench/package.json b/bench/package.json
index dbb9794b2e40418da86858c044361d61c7ff5165..cd8b234aaea5800b184f6bd1e53f95dae98dd0b6 100644
--- a/bench/package.json
+++ b/bench/package.json
@@ -1,14 +1,15 @@
{
"name": "parse5-benchmarks",
+ "private": "true",
+ "type": "module",
"version": "1.0.0",
"description": "parse5 regression benchmarks",
"author": "Ivan Nikulin ",
"license": "MIT",
"dependencies": {
"benchmark": "^2.1.4",
- "human-format": "^0.7.0",
- "node-memwatch": "^1.0.1",
- "parse5": "*",
- "promisify-event": "^1.0.0"
+ "human-format": "^1.0.0",
+ "@airbnb/node-memwatch": "^2.0.0",
+ "parse5": "npm:parse5"
}
}
diff --git a/bench/perf/index.js b/bench/perf/index.js
index ae87e04963159d50dadabab497a5d6b896e5f7b0..2306eabe056c9755def2567a33bedf4d16421fb1 100644
--- a/bench/perf/index.js
+++ b/bench/perf/index.js
@@ -1,37 +1,38 @@
-'use strict';
-
-const { join } = require('path');
-const { readFileSync, createReadStream, readdirSync } = require('fs');
-const Benchmark = require('benchmark');
-const { loadTreeConstructionTestData } = require('../../test/utils/generate-parsing-tests');
-const loadSAXParserTestData = require('../../test/utils/load-sax-parser-test-data');
-const { treeAdapters, WritableStreamStub } = require('../../test/utils/common');
+import { readFileSync, createReadStream, readdirSync } from 'node:fs';
+import Benchmark from 'benchmark';
+import { loadTreeConstructionTestData } from 'parse5-test-utils/dist/generate-parsing-tests.js';
+import { loadSAXParserTestData } from 'parse5-test-utils/dist/load-sax-parser-test-data.js';
+import { treeAdapters, WritableStreamStub, finished } from 'parse5-test-utils/dist/common.js';
+import * as parse5 from '../../packages/parse5/dist/index.js';
+import { ParserStream as parse5Stream } from '../../packages/parse5-parser-stream/dist/index.js';
+import * as parse5Upstream from 'parse5';
+
+const hugePagePath = new URL('../../test/data/huge-page/huge-page.html', import.meta.url);
+const treeConstructionPath = new URL('../../test/data/html5lib-tests/tree-construction', import.meta.url);
+const saxPath = new URL('../../test/data/sax/', import.meta.url);
//HACK: https://github.com/bestiejs/benchmark.js/issues/51
/* global workingCopy, WorkingCopyParserStream, upstreamParser, hugePage, microTests, runMicro, runPages, files */
-global.workingCopy = require('../../packages/parse5/lib');
-global.WorkingCopyParserStream = require('../../packages/parse5-parser-stream/lib');
-global.upstreamParser = require('parse5');
+global.workingCopy = parse5;
+global.WorkingCopyParserStream = parse5Stream;
+global.upstreamParser = parse5Upstream;
// Huge page data
-global.hugePage = readFileSync(join(__dirname, '../../test/data/huge-page/huge-page.html')).toString();
+global.hugePage = readFileSync(hugePagePath).toString();
// Micro data
-global.microTests = loadTreeConstructionTestData(
- [join(__dirname, '../../test/data/html5lib-tests/tree-construction')],
- treeAdapters.default
-)
+global.microTests = loadTreeConstructionTestData(treeConstructionPath, treeAdapters.default)
.filter(
- test =>
- //NOTE: this test caused stack overflow in parse5 v1.x
+ (test) =>
+ //NOTE: this test caused a stack overflow in parse5 v1.x
test.input !== '
- Version history
+ Changelog
diff --git a/packages/parse5-plain-text-conversion-stream/docs/index.md b/packages/parse5-plain-text-conversion-stream/docs/index.md
deleted file mode 100644
index 9df2f6399393f292f0669a51e4623cece60af65a..0000000000000000000000000000000000000000
--- a/packages/parse5-plain-text-conversion-stream/docs/index.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# Class: PlainTextConversionStream
-
-Converts plain text files into HTML document as required by [HTML specification](https://html.spec.whatwg.org/#read-text). A [writable stream](https://nodejs.org/api/stream.html#stream_class_stream_writable).
-
-*__example__*:
-
-```js
-const PlainTextConversionStream = require('parse5-plain-text-conversion-stream');
-const fs = require('fs');
-
-const file = fs.createReadStream('war_and_peace.txt');
-const converter = new PlainTextConversionStream();
-
-converter.once('finish', () => {
- console.log(converter.document.childNodes[1].childNodes[0].tagName); //> 'head'
-});
-
-file.pipe(converter);
-```
-
-### Constructors
-
-* [constructor](#constructor)
-
-### Properties
-
-* [document](#document)
-
-### Methods and events
-
-See: [writable stream API](https://nodejs.org/api/stream.html#stream_class_stream_writable).
-
----
-
-## Constructors
-
-
-
-### constructor
-
-⊕ **new PlainTextConversionStream**(options?: *[ParserOptions](../../parse5/parser-options.md)*): [PlainTextConversionStream]()
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| `Optional` options | [ParserOptions](../../parse5/parser-options.md) | Parsing options. |
-
-**Returns:** [PlainTextConversionStream]()
-
-___
-
-## Properties
-
-
-
-### document
-
-**● document**: *Document*
-
-The resulting document node.
-
-___
diff --git a/packages/parse5-plain-text-conversion-stream/lib/index.js b/packages/parse5-plain-text-conversion-stream/lib/index.js
deleted file mode 100644
index 46b39c5c80c848dfcd25f5b043f1fec4158996db..0000000000000000000000000000000000000000
--- a/packages/parse5-plain-text-conversion-stream/lib/index.js
+++ /dev/null
@@ -1,21 +0,0 @@
-'use strict';
-
-const ParserStream = require('parse5-parser-stream');
-const $ = require('parse5/lib/common/html').TAG_NAMES;
-
-class PlainTextConversionStream extends ParserStream {
- constructor(options) {
- super(options);
-
- // NOTE: see https://html.spec.whatwg.org/#read-text
- this.parser._insertFakeElement($.HTML);
- this.parser._insertFakeElement($.HEAD);
- this.parser.openElements.pop();
- this.parser._insertFakeElement($.BODY);
- this.parser._insertFakeElement($.PRE);
- this.parser.treeAdapter.insertText(this.parser.openElements.current, '\n');
- this.parser.switchToPlaintextParsing();
- }
-}
-
-module.exports = PlainTextConversionStream;
diff --git a/packages/parse5-plain-text-conversion-stream/lib/index.ts b/packages/parse5-plain-text-conversion-stream/lib/index.ts
new file mode 100644
index 0000000000000000000000000000000000000000..f07946e6ca2c081c43c3415f1fe269d400cde856
--- /dev/null
+++ b/packages/parse5-plain-text-conversion-stream/lib/index.ts
@@ -0,0 +1,40 @@
+import { type ParserOptions, type TreeAdapterTypeMap, html } from 'parse5';
+import { ParserStream } from 'parse5-parser-stream';
+
+const { TAG_ID: $, TAG_NAMES: TN } = html;
+
+/**
+ * Converts plain text files into HTML document as required by [HTML specification](https://html.spec.whatwg.org/#read-text).
+ * A [writable stream](https://nodejs.org/api/stream.html#stream_class_stream_writable).
+ *
+ * @example
+ *
+ * ```js
+ * const PlainTextConversionStream = require('parse5-plain-text-conversion-stream');
+ * const fs = require('fs');
+ * const { finished } = require('node:stream');
+ *
+ * const file = fs.createReadStream('war_and_peace.txt');
+ * const converter = new PlainTextConversionStream();
+ *
+ * finished(converter, () => {
+ * console.log(converter.document.childNodes[1].childNodes[0].tagName); //> 'head'
+ * });
+ *
+ * file.pipe(converter);
+ * ```
+ */
+export class PlainTextConversionStream extends ParserStream {
+ constructor(options?: ParserOptions) {
+ super(options);
+
+ // NOTE: see https://html.spec.whatwg.org/#read-text
+ this.parser._insertFakeElement(TN.HTML, $.HTML);
+ this.parser._insertFakeElement(TN.HEAD, $.HEAD);
+ this.parser.openElements.pop();
+ this.parser._insertFakeElement(TN.BODY, $.BODY);
+ this.parser._insertFakeElement(TN.PRE, $.PRE);
+ this.parser.treeAdapter.insertText(this.parser.openElements.current, '\n');
+ this.parser.switchToPlaintextParsing();
+ }
+}
diff --git a/packages/parse5-plain-text-conversion-stream/package.json b/packages/parse5-plain-text-conversion-stream/package.json
index 974a0d13f031f4f85e128c4ef6fb2d1f6476b4bb..9104723447bc4ea366d8f7db8c2dea9204371408 100644
--- a/packages/parse5-plain-text-conversion-stream/package.json
+++ b/packages/parse5-plain-text-conversion-stream/package.json
@@ -1,10 +1,12 @@
{
"name": "parse5-plain-text-conversion-stream",
+ "type": "module",
"description": "Stream that converts plain text files into HTML document.",
- "version": "6.0.1",
+ "version": "7.0.0",
"author": "Ivan Nikulin (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://github.com/inikulin/parse5",
+ "funding": "https://github.com/inikulin/parse5?sponsor=1",
"keywords": [
"parse5",
"parser",
@@ -15,16 +17,20 @@
"plain text"
],
"license": "MIT",
- "main": "./lib/index.js",
+ "main": "dist/index.js",
+ "module": "dist/index.js",
+ "types": "dist/index.d.ts",
+ "exports": "./dist/index.js",
"dependencies": {
- "parse5": "^6.0.1",
- "parse5-parser-stream": "^6.0.1"
+ "parse5": "^7.0.0",
+ "parse5-parser-stream": "^7.0.0"
},
"repository": {
"type": "git",
"url": "git://github.com/inikulin/parse5.git"
},
"files": [
- "lib"
+ "dist",
+ "!*.map"
]
}
diff --git a/packages/parse5-plain-text-conversion-stream/test/plain-text-conversion-stream.test.js b/packages/parse5-plain-text-conversion-stream/test/plain-text-conversion-stream.test.ts
similarity index 35%
rename from packages/parse5-plain-text-conversion-stream/test/plain-text-conversion-stream.test.js
rename to packages/parse5-plain-text-conversion-stream/test/plain-text-conversion-stream.test.ts
index 47a981c4d9fb6ea974de65783d9abc29854d8db0..da8e28201486bb58eb8cbfe6081bb10784c2c538 100644
--- a/packages/parse5-plain-text-conversion-stream/test/plain-text-conversion-stream.test.js
+++ b/packages/parse5-plain-text-conversion-stream/test/plain-text-conversion-stream.test.ts
@@ -1,31 +1,31 @@
-'use strict';
+import * as assert from 'node:assert';
+import { serialize } from 'parse5';
+import { PlainTextConversionStream } from '../lib/index.js';
+import { generateTestsForEachTreeAdapter } from 'parse5-test-utils/utils/common.js';
-const assert = require('assert');
-const parse5 = require('parse5');
-const PlainTextConversionStream = require('../lib');
-const { generateTestsForEachTreeAdapter } = require('../../../test/utils/common');
-
-generateTestsForEachTreeAdapter(module.exports, (_test, treeAdapter) => {
- _test['Plain text conversion stream'] = function() {
- const converter = new PlainTextConversionStream({ treeAdapter: treeAdapter });
+generateTestsForEachTreeAdapter('plain-test-conversion-stream', (treeAdapter) => {
+ it('Plain text conversion stream', () => {
+ const converter = new PlainTextConversionStream({ treeAdapter });
converter.write('Hey');
converter.write('\r\nyo');
converter.write('\u0000');
converter.end('');
- const result = parse5.serialize(converter.document, { treeAdapter: treeAdapter });
+ const result = serialize(converter.document, { treeAdapter });
assert.strictEqual(
result,
'\nHey\nyo\uFFFD<html><head><body>
'
);
- };
+ });
});
-exports['Regression - Plain text conversion stream - Should not accept binary input (GH-269)'] = () => {
- const stream = new PlainTextConversionStream();
- const buf = Buffer.from('test');
+describe('plain-text-conversion-stream', () => {
+ it('Should not accept binary input (GH-269)', () => {
+ const stream = new PlainTextConversionStream();
+ const buf = Buffer.from('test');
- assert.throws(() => stream.write(buf), TypeError);
-};
+ assert.throws(() => stream.write(buf), TypeError);
+ });
+});
diff --git a/packages/parse5-plain-text-conversion-stream/tsconfig.json b/packages/parse5-plain-text-conversion-stream/tsconfig.json
new file mode 100644
index 0000000000000000000000000000000000000000..ebf4d81b68956ff9387d6a4e366d3270c4e693d3
--- /dev/null
+++ b/packages/parse5-plain-text-conversion-stream/tsconfig.json
@@ -0,0 +1,9 @@
+{
+ "extends": "../../tsconfig.json",
+ "compilerOptions": {
+ "rootDir": "lib",
+ "outDir": "dist"
+ },
+ "include": ["**/*.ts"],
+ "exclude": ["**/*.test.ts", "dist", "test"]
+}
diff --git a/packages/parse5-sax-parser/README.md b/packages/parse5-sax-parser/README.md
index d695fafe7bad6acb0becd7e83f41bf3335e079cc..d59744a7667cdf9799d73f75a5e6225fc0754310 100644
--- a/packages/parse5-sax-parser/README.md
+++ b/packages/parse5-sax-parser/README.md
@@ -16,7 +16,7 @@
- 📖 Documentation 📖
+ 📖 Documentation 📖
---
@@ -30,5 +30,5 @@
- Version history
+ Changelog
diff --git a/packages/parse5-sax-parser/docs/index.md b/packages/parse5-sax-parser/docs/index.md
deleted file mode 100644
index a089a954ad96e0416fbb883d084cd9cefa7590e2..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/docs/index.md
+++ /dev/null
@@ -1,198 +0,0 @@
-# Class: SAXParser
-
-Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML parser. A [transform stream](https://nodejs.org/api/stream.html#stream_class_stream_transform) (which means you can pipe _through_ it, see example).
-
-*__example__*:
-
-```js
- const SAXParser = require('parse5-sax-parser');
- const http = require('http');
- const fs = require('fs');
-
- const file = fs.createWriteStream('/home/google.com.html');
- const parser = new SAXParser();
-
- parser.on('text', text => {
- // Handle page text content
- ...
- });
-
- http.get('http://google.com', res => {
- // SAXParser is the Transform stream, which means you can pipe
- // through it. So, you can analyze page content and, e.g., save it
- // to the file at the same time:
- res.pipe(parser).pipe(file);
- });
-```
-
-### Constructors
-
-* [constructor](#constructor)
-
-### Methods
-
-* [stop](#stop)
-
-See also: [transform stream API](https://nodejs.org/api/stream.html#stream_class_stream_transform).
-
-### Events
-
-* [on("startTag")](#on_startag)
-* [on("endTag")](#on_startag)
-* [on("comment")](#on_comment)
-* [on("text")](#on_text)
-* [on("doctype")](#on_doctype)
-
-See also: [transform stream API](https://nodejs.org/api/stream.html#stream_class_stream_transform).
-
----
-
-## Constructors
-
-
-
-### constructor
-
-⊕ **new SAXParser**(options?: *[SAXParserOptions](sax-parser-options.md)*): [SAXParser]()
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| `Optional` options | [SAXParserOptions](sax-parser-options.md) | Parsing options. |
-
-**Returns:** [SAXParser]()
-
-___
-
-
-## Methods
-
-
-
-
-### stop
-
-▸ **stop**(): `void`
-
-Stops parsing. Useful if you want the parser to stop consuming CPU time once you've obtained the desired info from the input stream. Doesn't prevent piping, so that data will flow through the parser as usual.
-
-*__example__*:
-
-```js
-const SAXParser = require('parse5-sax-parser');
-const http = require('http');
-const fs = require('fs');
-
-const file = fs.createWriteStream('google.com.html');
-const parser = new SAXParser();
-
-parser.on('doctype', ({ name, publicId, systemId }) => {
- // Process doctype info and stop parsing
- ...
- parser.stop();
-});
-
-http.get('http://google.com', res => {
- // Despite the fact that parser.stop() was called whole
- // content of the page will be written to the file
- res.pipe(parser).pipe(file);
-});
-```
-
-**Returns:** `void`
-
-___
-
-## Events
-
-
-
-### on("startTag")
-
-▸ **on**(event: *"startTag"*, listener: *`function`*): `this`
-
-Raised when the parser encounters a start tag.
-
-**Parameters:**
-
-| Param | Type |
-| ------ | ------ |
-| event | "startTag" |
-| listener | function (startTag: *[StartTagToken](./tokens/start-tag.md)*) |
-
-**Returns:** `this`
-
-___
-
-
-### on("endTag")
-
-▸ **on**(event: *"endTag"*, listener: *`function`*): `this`
-
-Raised when parser encounters an end tag.
-
-**Parameters:**
-
-| Param | Type |
-| ------ | ------ |
-| event | "endTag" |
-| listener | function (endTag: *[EndTagToken](./tokens/end-tag.md)*) |
-
-**Returns:** `this`
-
-___
-
-
-### on("comment")
-
-▸ **on**(event: *"comment"*, listener: *`function`*): `this`
-
-Raised when parser encounters a comment.
-
-**Parameters:**
-
-| Param | Type |
-| ------ | ------ |
-| event | "comment" |
-| listener | function (comment: *[CommentToken](./tokens/comment.md)*) |
-
-**Returns:** `this`
-
-___
-
-
-### on("text")
-
-▸ **on**(event: *"text"*, listener: *`function`*): `this`
-
-Raised when parser encounters text content.
-
-**Parameters:**
-
-| Param | Type |
-| ------ | ------ |
-| event | "text" |
-| listener | function (text: *[TextToken](./tokens/text.md)*)|
-
-**Returns:** `this`
-
-___
-
-
-### on("doctype")
-
-▸ **on**(event: *"doctype"*, listener: *`function`*): `this`
-
-Raised when parser encounters a [document type declaration](https://en.wikipedia.org/wiki/Document_type_declaration).
-
-**Parameters:**
-
-| Param | Type |
-| ------ | ------ |
-| event | "doctype" |
-| listener | function (doctype: *[DoctypeToken](./tokens/doctype.md)*) |
-
-**Returns:** `this`
-
-___
diff --git a/packages/parse5-sax-parser/docs/sax-parser-options.md b/packages/parse5-sax-parser/docs/sax-parser-options.md
deleted file mode 100644
index 25afc413f1d2cbebd27fcce9af63b7191185f0c4..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/docs/sax-parser-options.md
+++ /dev/null
@@ -1,20 +0,0 @@
-# Interface: SAXParserOptions
-
-### Properties
-
-* [sourceCodeLocationInfo](#locationinfo)
-
----
-
-## Properties
-
-
-
-### `` sourceCodeLocationInfo
-
-**● sourceCodeLocationInfo**: *`boolean`*
-
-Enables source code location information for the tokens. When enabled, each token will have
-`sourceCodeLocation` property.
-
-___
diff --git a/packages/parse5-sax-parser/docs/tokens/comment.md b/packages/parse5-sax-parser/docs/tokens/comment.md
deleted file mode 100644
index bb0d56242b538f796325117af7353013e3451262..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/docs/tokens/comment.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Interface: CommentToken
-
-### Properties
-
-* [text](#text)
-* [sourceCodeLocation](#sourcecodelocation)
-
----
-
-## Properties
-
-
-
-### text
-
-**● text**: *`string`*
-
-Comment text
-
-___
-
-
-
-### `` sourceCodeLocation
-
-**● sourceCodeLocation**: *[Location](../../../parse5/docs/source-code-location/location.md)*
-
-Comment source code location info. Available if location info is enabled via [SAXParserOptions](../sax-parser-options.md)
-
-___
diff --git a/packages/parse5-sax-parser/docs/tokens/doctype.md b/packages/parse5-sax-parser/docs/tokens/doctype.md
deleted file mode 100644
index 3a2df7f13d69eb476598315cb633ea6e6ffc88fe..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/docs/tokens/doctype.md
+++ /dev/null
@@ -1,51 +0,0 @@
-# Interface: DoctypeToken
-
-### Properties
-
-* [name](#name)
-* [publicId](#publicid)
-* [systemId](#systemid)
-* [sourceCodeLocation](#sourcecodelocation)
-
----
-
-## Properties
-
-
-
-### name
-
-**● name**: *`string`*
-
-Document type name.
-
-___
-
-
-### publicId
-
-**● publicId**: *`string`*
-
-Document type public identifier.
-
-___
-
-
-### systemId
-
-**● systemId**: *`string`*
-
-Document type system identifier.
-
-___
-
-
-
-
-### `` sourceCodeLocation
-
-**● sourceCodeLocation**: *[Location](../../../parse5/docs/source-code-location/location.md)*
-
-Text content source code location info. Available if location info is enabled via [SAXParserOptions](../sax-parser-options.md)
-
-___
diff --git a/packages/parse5-sax-parser/docs/tokens/end-tag.md b/packages/parse5-sax-parser/docs/tokens/end-tag.md
deleted file mode 100644
index 7c6d01230148ff301cf94a8444bc7bc9557174cd..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/docs/tokens/end-tag.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Interface: EndTagToken
-
-### Properties
-
-* [sourceCodeLocation](#sourcecodelocation)
-* [tagName](#tagname)
-
----
-
-## Properties
-
-
-
-### tagName
-
-**● tagName**: *`string`*
-
-Tag name
-
-___
-
-
-
-### `` sourceCodeLocation
-
-**● sourceCodeLocation**: *[Location](../../../parse5/docs/source-code-location/location.md)*
-
-End tag source code location info. Available if location info is enabled via [SAXParserOptions](../sax-parser-options.md)
-
-___
diff --git a/packages/parse5-sax-parser/docs/tokens/start-tag.md b/packages/parse5-sax-parser/docs/tokens/start-tag.md
deleted file mode 100644
index fd42f461205510225fcf7924f28ff16ae4ec180f..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/docs/tokens/start-tag.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Interface: StartTagToken
-
-### Properties
-
-* [attrs](#attrs)
-* [selfClosing](#selfclosing)
-* [sourceCodeLocation](#sourcecodelocation)
-* [tagName](#tagname)
-
----
-
-## Properties
-
-
-
-### attrs
-
-**● attrs**: *Attribute*[]
-
-List of attributes
-
-___
-
-
-### tagName
-
-**● tagName**: *`string`*
-
-Tag name
-
-___
-
-
-### selfClosing
-
-**● selfClosing**: *`boolean`*
-
-Indicates if the tag is self-closing
-
-___
-
-
-### `` sourceCodeLocation
-
-**● sourceCodeLocation**: *[StartTagLocation](../../../parse5/docs/source-code-location/start-tag-location.md)*
-
-Start tag source code location info. Available if location info is enabled via [SAXParserOptions](../sax-parser-options.md)
-
-___
diff --git a/packages/parse5-sax-parser/docs/tokens/text.md b/packages/parse5-sax-parser/docs/tokens/text.md
deleted file mode 100644
index ea58eb9147b80e7e1f47c4b8145d2d1075d35c1b..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/docs/tokens/text.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Interface: TextToken
-
-### Properties
-
-* [text](#text)
-* [sourceCodeLocation](#sourcecodelocation)
-
----
-
-## Properties
-
-
-
-### text
-
-**● text**: *`string`*
-
-Text content
-
-___
-
-
-
-### `` sourceCodeLocation
-
-**● sourceCodeLocation**: *[Location](../../../parse5/docs/source-code-location/location.md)*
-
-Text content source code location info. Available if location info is enabled via [SAXParserOptions](../sax-parser-options.md)
-
-___
diff --git a/packages/parse5-sax-parser/lib/dev-null-stream.js b/packages/parse5-sax-parser/lib/dev-null-stream.js
deleted file mode 100644
index 95661c4fc1a98b88271e5439f8a1ff6de925c5fc..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/lib/dev-null-stream.js
+++ /dev/null
@@ -1,11 +0,0 @@
-'use strict';
-
-const { Writable } = require('stream');
-
-class DevNullStream extends Writable {
- _write(chunk, encoding, cb) {
- cb();
- }
-}
-
-module.exports = DevNullStream;
diff --git a/packages/parse5-sax-parser/lib/dev-null-stream.ts b/packages/parse5-sax-parser/lib/dev-null-stream.ts
new file mode 100644
index 0000000000000000000000000000000000000000..c7345136c8306e403dfc5f2fd09de7884499ccbc
--- /dev/null
+++ b/packages/parse5-sax-parser/lib/dev-null-stream.ts
@@ -0,0 +1,7 @@
+import { Writable } from 'node:stream';
+
+export class DevNullStream extends Writable {
+ override _write(_chunk: string, _encoding: string, cb: () => void): void {
+ cb();
+ }
+}
diff --git a/packages/parse5-sax-parser/lib/index.js b/packages/parse5-sax-parser/lib/index.js
deleted file mode 100644
index cbdd582920303b35774b11f7587ebab1b484b101..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/lib/index.js
+++ /dev/null
@@ -1,166 +0,0 @@
-'use strict';
-
-const { Transform } = require('stream');
-const Tokenizer = require('parse5/lib/tokenizer');
-const LocationInfoTokenizerMixin = require('parse5/lib/extensions/location-info/tokenizer-mixin');
-const Mixin = require('parse5/lib/utils/mixin');
-const mergeOptions = require('parse5/lib/utils/merge-options');
-const DevNullStream = require('./dev-null-stream');
-const ParserFeedbackSimulator = require('./parser-feedback-simulator');
-
-const DEFAULT_OPTIONS = {
- sourceCodeLocationInfo: false
-};
-
-class SAXParser extends Transform {
- constructor(options) {
- super({ encoding: 'utf8', decodeStrings: false });
-
- this.options = mergeOptions(DEFAULT_OPTIONS, options);
-
- this.tokenizer = new Tokenizer(options);
- this.locInfoMixin = null;
-
- if (this.options.sourceCodeLocationInfo) {
- this.locInfoMixin = Mixin.install(this.tokenizer, LocationInfoTokenizerMixin);
- }
-
- this.parserFeedbackSimulator = new ParserFeedbackSimulator(this.tokenizer);
-
- this.pendingText = null;
-
- this.lastChunkWritten = false;
- this.stopped = false;
-
- // NOTE: always pipe stream to the /dev/null stream to avoid
- // `highWaterMark` hit even if we don't have consumers.
- // (see: https://github.com/inikulin/parse5/issues/97#issuecomment-171940774)
- this.pipe(new DevNullStream());
- }
-
- //TransformStream implementation
- _transform(chunk, encoding, callback) {
- if (typeof chunk !== 'string') {
- throw new TypeError('Parser can work only with string streams.');
- }
-
- callback(null, this._transformChunk(chunk));
- }
-
- _final(callback) {
- this.lastChunkWritten = true;
- callback(null, this._transformChunk(''));
- }
-
- stop() {
- this.stopped = true;
- }
-
- //Internals
- _transformChunk(chunk) {
- if (!this.stopped) {
- this.tokenizer.write(chunk, this.lastChunkWritten);
- this._runParsingLoop();
- }
- return chunk;
- }
-
- _runParsingLoop() {
- let token = null;
-
- do {
- token = this.parserFeedbackSimulator.getNextToken();
-
- if (token.type === Tokenizer.HIBERNATION_TOKEN) {
- break;
- }
-
- if (
- token.type === Tokenizer.CHARACTER_TOKEN ||
- token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN ||
- token.type === Tokenizer.NULL_CHARACTER_TOKEN
- ) {
- if (this.pendingText === null) {
- token.type = Tokenizer.CHARACTER_TOKEN;
- this.pendingText = token;
- } else {
- this.pendingText.chars += token.chars;
-
- if (this.options.sourceCodeLocationInfo) {
- const { endLine, endCol, endOffset } = token.location;
- Object.assign(this.pendingText.location, {
- endLine,
- endCol,
- endOffset
- });
- }
- }
- } else {
- this._emitPendingText();
- this._handleToken(token);
- }
- } while (!this.stopped && token.type !== Tokenizer.EOF_TOKEN);
- }
-
- _handleToken(token) {
- if (token.type === Tokenizer.EOF_TOKEN) {
- return true;
- }
-
- const { eventName, reshapeToken } = TOKEN_EMISSION_HELPERS[token.type];
-
- if (this.listenerCount(eventName) === 0) {
- return false;
- }
-
- this._emitToken(eventName, reshapeToken(token));
-
- return true;
- }
-
- _emitToken(eventName, token) {
- this.emit(eventName, token);
- }
-
- _emitPendingText() {
- if (this.pendingText !== null) {
- this._handleToken(this.pendingText);
- this.pendingText = null;
- }
- }
-}
-
-const TOKEN_EMISSION_HELPERS = {
- [Tokenizer.START_TAG_TOKEN]: {
- eventName: 'startTag',
- reshapeToken: origToken => ({
- tagName: origToken.tagName,
- attrs: origToken.attrs,
- selfClosing: origToken.selfClosing,
- sourceCodeLocation: origToken.location
- })
- },
- [Tokenizer.END_TAG_TOKEN]: {
- eventName: 'endTag',
- reshapeToken: origToken => ({ tagName: origToken.tagName, sourceCodeLocation: origToken.location })
- },
- [Tokenizer.COMMENT_TOKEN]: {
- eventName: 'comment',
- reshapeToken: origToken => ({ text: origToken.data, sourceCodeLocation: origToken.location })
- },
- [Tokenizer.DOCTYPE_TOKEN]: {
- eventName: 'doctype',
- reshapeToken: origToken => ({
- name: origToken.name,
- publicId: origToken.publicId,
- systemId: origToken.systemId,
- sourceCodeLocation: origToken.location
- })
- },
- [Tokenizer.CHARACTER_TOKEN]: {
- eventName: 'text',
- reshapeToken: origToken => ({ text: origToken.chars, sourceCodeLocation: origToken.location })
- }
-};
-
-module.exports = SAXParser;
diff --git a/packages/parse5-sax-parser/lib/index.ts b/packages/parse5-sax-parser/lib/index.ts
new file mode 100644
index 0000000000000000000000000000000000000000..8b51209ff35129e17290a992f778369e92b4e548
--- /dev/null
+++ b/packages/parse5-sax-parser/lib/index.ts
@@ -0,0 +1,294 @@
+import { Transform } from 'node:stream';
+import type { Tokenizer, TokenHandler, Token } from 'parse5';
+import { DevNullStream } from './dev-null-stream.js';
+import { ParserFeedbackSimulator } from './parser-feedback-simulator.js';
+
+export interface SAXParserOptions {
+ /**
+ * Enables source code location information for tokens.
+ *
+ * When enabled, each token will have a `sourceCodeLocation` property.
+ */
+ sourceCodeLocationInfo?: boolean;
+}
+
+/**
+ * Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML parser.
+ * A [transform stream](https://nodejs.org/api/stream.html#stream_class_stream_transform) (which means you can pipe _through_ it, see example).
+ *
+ * @example
+ *
+ * ```js
+ * const SAXParser = require('parse5-sax-parser');
+ * const http = require('http');
+ * const fs = require('fs');
+ *
+ * const file = fs.createWriteStream('/home/google.com.html');
+ * const parser = new SAXParser();
+ *
+ * parser.on('text', text => {
+ * // Handle page text content
+ * ...
+ * });
+ *
+ * http.get('http://google.com', res => {
+ * // `SAXParser` is the `Transform` stream, which means you can pipe
+ * // through it. So, you can analyze the page content and, e.g., save it
+ * // to the file at the same time:
+ * res.pipe(parser).pipe(file);
+ * });
+ * ```
+ */
+export class SAXParser extends Transform implements TokenHandler {
+ protected options: SAXParserOptions;
+ protected parserFeedbackSimulator: ParserFeedbackSimulator;
+ private pendingText: Text | null = null;
+ private lastChunkWritten = false;
+ private stopped = false;
+ protected tokenizer: Tokenizer;
+
+ /**
+ * @param options Parsing options.
+ */
+ constructor(options: SAXParserOptions = {}) {
+ super({ encoding: 'utf8', decodeStrings: false });
+
+ this.options = {
+ sourceCodeLocationInfo: false,
+ ...options,
+ };
+
+ this.parserFeedbackSimulator = new ParserFeedbackSimulator(this.options, this);
+ this.tokenizer = this.parserFeedbackSimulator.tokenizer;
+
+ // NOTE: always pipe the stream to the /dev/null stream to avoid
+ // the `highWaterMark` to be hit even if we don't have consumers.
+ // (see: https://github.com/inikulin/parse5/issues/97#issuecomment-171940774)
+ this.pipe(new DevNullStream());
+ }
+
+ //`Transform` implementation
+ override _transform(
+ chunk: string,
+ _encoding: string,
+ callback: (error?: Error | null, data?: string) => void
+ ): void {
+ if (typeof chunk !== 'string') {
+ throw new TypeError('Parser can work only with string streams.');
+ }
+
+ callback(null, this._transformChunk(chunk));
+ }
+
+ override _final(callback: (error?: Error | null, data?: string) => void): void {
+ this.lastChunkWritten = true;
+ callback(null, this._transformChunk(''));
+ }
+
+ /**
+ * Stops parsing. Useful if you want the parser to stop consuming CPU time
+ * once you've obtained the desired info from the input stream. Doesn't
+ * prevent piping, so that data will flow through the parser as usual.
+ *
+ * @example
+ *
+ * ```js
+ * const SAXParser = require('parse5-sax-parser');
+ * const http = require('http');
+ * const fs = require('fs');
+ *
+ * const file = fs.createWriteStream('google.com.html');
+ * const parser = new SAXParser();
+ *
+ * parser.on('doctype', ({ name, publicId, systemId }) => {
+ * // Process doctype info and stop parsing
+ * ...
+ * parser.stop();
+ * });
+ *
+ * http.get('http://google.com', res => {
+ * // Despite the fact that parser.stop() was called whole
+ * // content of the page will be written to the file
+ * res.pipe(parser).pipe(file);
+ * });
+ * ```
+ */
+ public stop(): void {
+ this.stopped = true;
+ this.tokenizer.pause();
+ }
+
+ //Internals
+ protected _transformChunk(chunk: string): string {
+ if (!this.stopped) {
+ this.tokenizer.write(chunk, this.lastChunkWritten);
+ }
+ return chunk;
+ }
+
+ /** @internal */
+ onCharacter({ chars, location }: Token.CharacterToken): void {
+ if (this.pendingText === null) {
+ this.pendingText = { text: chars, sourceCodeLocation: location };
+ } else {
+ this.pendingText.text += chars;
+
+ if (location && this.pendingText.sourceCodeLocation) {
+ const { endLine, endCol, endOffset } = location;
+ this.pendingText.sourceCodeLocation = {
+ ...this.pendingText.sourceCodeLocation,
+ endLine,
+ endCol,
+ endOffset,
+ };
+ }
+ }
+
+ if (this.tokenizer.preprocessor.willDropParsedChunk()) {
+ this._emitPendingText();
+ }
+ }
+
+ /** @internal */
+ onWhitespaceCharacter(token: Token.CharacterToken): void {
+ this.onCharacter(token);
+ }
+
+ /** @internal */
+ onNullCharacter(token: Token.CharacterToken): void {
+ this.onCharacter(token);
+ }
+
+ /** @internal */
+ onEof(): void {
+ this._emitPendingText();
+ this.stopped = true;
+ }
+
+ /** @internal */
+ onStartTag(token: Token.TagToken): void {
+ this._emitPendingText();
+
+ const startTag: StartTag = {
+ tagName: token.tagName,
+ attrs: token.attrs,
+ selfClosing: token.selfClosing,
+ sourceCodeLocation: token.location,
+ };
+ this.emitIfListenerExists('startTag', startTag);
+ }
+
+ /** @internal */
+ onEndTag(token: Token.TagToken): void {
+ this._emitPendingText();
+
+ const endTag: EndTag = {
+ tagName: token.tagName,
+ sourceCodeLocation: token.location,
+ };
+ this.emitIfListenerExists('endTag', endTag);
+ }
+
+ /** @internal */
+ onDoctype(token: Token.DoctypeToken): void {
+ this._emitPendingText();
+
+ const doctype: Doctype = {
+ name: token.name,
+ publicId: token.publicId,
+ systemId: token.systemId,
+ sourceCodeLocation: token.location,
+ };
+ this.emitIfListenerExists('doctype', doctype);
+ }
+
+ /** @internal */
+ onComment(token: Token.CommentToken): void {
+ this._emitPendingText();
+
+ const comment: Comment = {
+ text: token.data,
+ sourceCodeLocation: token.location,
+ };
+ this.emitIfListenerExists('comment', comment);
+ }
+
+ protected emitIfListenerExists(eventName: string, token: SaxToken): boolean {
+ if (this.listenerCount(eventName) === 0) {
+ return false;
+ }
+
+ this._emitToken(eventName, token);
+
+ return true;
+ }
+
+ protected _emitToken(eventName: string, token: SaxToken): void {
+ this.emit(eventName, token);
+ }
+
+ private _emitPendingText(): void {
+ if (this.pendingText !== null) {
+ this.emitIfListenerExists('text', this.pendingText);
+ this.pendingText = null;
+ }
+ }
+}
+
+export interface SaxToken {
+ /** Source code location info. Available if location info is enabled via {@link SAXParserOptions}. */
+ sourceCodeLocation?: Token.Location | null;
+}
+
+export interface StartTag extends SaxToken {
+ /** Tag name */
+ tagName: string;
+ /** List of attributes */
+ attrs: Token.Attribute[];
+ /** Indicates if the tag is self-closing */
+ selfClosing: boolean;
+}
+
+export interface EndTag extends SaxToken {
+ /** Tag name */
+ tagName: string;
+}
+
+export interface Text extends SaxToken {
+ /** Text content. */
+ text: string;
+}
+
+export interface Comment extends SaxToken {
+ /** Comment text. */
+ text: string;
+}
+
+export interface Doctype extends SaxToken {
+ /** Document type name. */
+ name: string | null;
+ /** Document type public identifier. */
+ publicId: string | null;
+ /** Document type system identifier. */
+ systemId: string | null;
+}
+
+export interface SAXParser {
+ /** Raised when the parser encounters a start tag. */
+ on(event: 'startTag', listener: (startTag: StartTag) => void): this;
+ /** Raised when the parser encounters an end tag. */
+ on(event: 'endTag', listener: (endTag: EndTag) => void): this;
+ /** Raised when the parser encounters a comment. */
+ on(event: 'comment', listener: (comment: Comment) => void): this;
+ /** Raised when the parser encounters text content. */
+ on(event: 'text', listener: (text: Text) => void): this;
+ /** Raised when the parser encounters a [document type declaration](https://en.wikipedia.org/wiki/Document_type_declaration) */
+ on(event: 'doctype', listener: (doctype: Doctype) => void): this;
+ /**
+ * Base event handler.
+ *
+ * @param event Name of the event
+ * @param handler Event handler
+ */
+ on(event: string, handler: (...args: any[]) => void): this;
+}
diff --git a/packages/parse5-sax-parser/lib/parser-feedback-simulator.js b/packages/parse5-sax-parser/lib/parser-feedback-simulator.js
deleted file mode 100644
index 3f3a29de33a2c1ff975b0656cb6da1dd62652ecc..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/lib/parser-feedback-simulator.js
+++ /dev/null
@@ -1,159 +0,0 @@
-'use strict';
-
-const Tokenizer = require('parse5/lib/tokenizer');
-const foreignContent = require('parse5/lib/common/foreign-content');
-const unicode = require('parse5/lib/common/unicode');
-const HTML = require('parse5/lib/common/html');
-
-//Aliases
-const $ = HTML.TAG_NAMES;
-const NS = HTML.NAMESPACES;
-
-//ParserFeedbackSimulator
-//Simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
-class ParserFeedbackSimulator {
- constructor(tokenizer) {
- this.tokenizer = tokenizer;
-
- this.namespaceStack = [];
- this.namespaceStackTop = -1;
- this._enterNamespace(NS.HTML);
- }
-
- getNextToken() {
- const token = this.tokenizer.getNextToken();
-
- if (token.type === Tokenizer.START_TAG_TOKEN) {
- this._handleStartTagToken(token);
- } else if (token.type === Tokenizer.END_TAG_TOKEN) {
- this._handleEndTagToken(token);
- } else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) {
- token.type = Tokenizer.CHARACTER_TOKEN;
- token.chars = unicode.REPLACEMENT_CHARACTER;
- } else if (this.skipNextNewLine) {
- if (token.type !== Tokenizer.HIBERNATION_TOKEN) {
- this.skipNextNewLine = false;
- }
-
- if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') {
- if (token.chars.length === 1) {
- return this.getNextToken();
- }
-
- token.chars = token.chars.substr(1);
- }
- }
-
- return token;
- }
-
- //Namespace stack mutations
- _enterNamespace(namespace) {
- this.namespaceStackTop++;
- this.namespaceStack.push(namespace);
-
- this.inForeignContent = namespace !== NS.HTML;
- this.currentNamespace = namespace;
- this.tokenizer.allowCDATA = this.inForeignContent;
- }
-
- _leaveCurrentNamespace() {
- this.namespaceStackTop--;
- this.namespaceStack.pop();
-
- this.currentNamespace = this.namespaceStack[this.namespaceStackTop];
- this.inForeignContent = this.currentNamespace !== NS.HTML;
- this.tokenizer.allowCDATA = this.inForeignContent;
- }
-
- //Token handlers
- _ensureTokenizerMode(tn) {
- if (tn === $.TEXTAREA || tn === $.TITLE) {
- this.tokenizer.state = Tokenizer.MODE.RCDATA;
- } else if (tn === $.PLAINTEXT) {
- this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
- } else if (tn === $.SCRIPT) {
- this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
- } else if (
- tn === $.STYLE ||
- tn === $.IFRAME ||
- tn === $.XMP ||
- tn === $.NOEMBED ||
- tn === $.NOFRAMES ||
- tn === $.NOSCRIPT
- ) {
- this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
- }
- }
-
- _handleStartTagToken(token) {
- let tn = token.tagName;
-
- if (tn === $.SVG) {
- this._enterNamespace(NS.SVG);
- } else if (tn === $.MATH) {
- this._enterNamespace(NS.MATHML);
- }
-
- if (this.inForeignContent) {
- if (foreignContent.causesExit(token)) {
- this._leaveCurrentNamespace();
- return;
- }
-
- const currentNs = this.currentNamespace;
-
- if (currentNs === NS.MATHML) {
- foreignContent.adjustTokenMathMLAttrs(token);
- } else if (currentNs === NS.SVG) {
- foreignContent.adjustTokenSVGTagName(token);
- foreignContent.adjustTokenSVGAttrs(token);
- }
-
- foreignContent.adjustTokenXMLAttrs(token);
-
- tn = token.tagName;
-
- if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) {
- this._enterNamespace(NS.HTML);
- }
- } else {
- if (tn === $.PRE || tn === $.TEXTAREA || tn === $.LISTING) {
- this.skipNextNewLine = true;
- } else if (tn === $.IMAGE) {
- token.tagName = $.IMG;
- }
-
- this._ensureTokenizerMode(tn);
- }
- }
-
- _handleEndTagToken(token) {
- let tn = token.tagName;
-
- if (!this.inForeignContent) {
- const previousNs = this.namespaceStack[this.namespaceStackTop - 1];
-
- if (previousNs === NS.SVG && foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn]) {
- tn = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn];
- }
-
- //NOTE: check for exit from integration point
- if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs)) {
- this._leaveCurrentNamespace();
- }
- } else if (
- (tn === $.SVG && this.currentNamespace === NS.SVG) ||
- (tn === $.MATH && this.currentNamespace === NS.MATHML)
- ) {
- this._leaveCurrentNamespace();
- }
-
- // NOTE: adjust end tag name as well for consistency
- if (this.currentNamespace === NS.SVG) {
- foreignContent.adjustTokenSVGTagName(token);
- }
- }
-}
-
-module.exports = ParserFeedbackSimulator;
diff --git a/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts
new file mode 100644
index 0000000000000000000000000000000000000000..116637dd13050eee74a415d4a37301f353cf6943
--- /dev/null
+++ b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts
@@ -0,0 +1,221 @@
+import {
+ Tokenizer,
+ type TokenizerOptions,
+ TokenizerMode,
+ type TokenHandler,
+ Token,
+ foreignContent,
+ html,
+} from 'parse5';
+
+const $ = html.TAG_ID;
+
+const REPLACEMENT_CHARACTER = '\uFFFD';
+const LINE_FEED_CODE_POINT = 0x0a;
+
+/**
+ * Simulates adjustments of the Tokenizer which are performed by the standard parser during tree construction.
+ */
+export class ParserFeedbackSimulator implements TokenHandler {
+ private namespaceStack: html.NS[] = [];
+ public inForeignContent = false;
+ public skipNextNewLine = false;
+ public tokenizer: Tokenizer;
+
+ constructor(options: TokenizerOptions, private handler: TokenHandler) {
+ this.tokenizer = new Tokenizer(options, this);
+ this._enterNamespace(html.NS.HTML);
+ }
+
+ /** @internal */
+ onNullCharacter(token: Token.CharacterToken): void {
+ this.skipNextNewLine = false;
+
+ if (this.inForeignContent) {
+ this.handler.onCharacter({
+ type: Token.TokenType.CHARACTER,
+ chars: REPLACEMENT_CHARACTER,
+ location: token.location,
+ });
+ } else {
+ this.handler.onNullCharacter(token);
+ }
+ }
+
+ /** @internal */
+ onWhitespaceCharacter(token: Token.CharacterToken): void {
+ if (this.skipNextNewLine && token.chars.charCodeAt(0) === LINE_FEED_CODE_POINT) {
+ this.skipNextNewLine = false;
+
+ if (token.chars.length === 1) {
+ return;
+ }
+
+ token.chars = token.chars.substr(1);
+ }
+
+ this.handler.onWhitespaceCharacter(token);
+ }
+
+ /** @internal */
+ onCharacter(token: Token.CharacterToken): void {
+ this.skipNextNewLine = false;
+ this.handler.onCharacter(token);
+ }
+
+ /** @internal */
+ onComment(token: Token.CommentToken): void {
+ this.skipNextNewLine = false;
+ this.handler.onComment(token);
+ }
+
+ /** @internal */
+ onDoctype(token: Token.DoctypeToken): void {
+ this.skipNextNewLine = false;
+ this.handler.onDoctype(token);
+ }
+
+ /** @internal */
+ onEof(token: Token.EOFToken): void {
+ this.skipNextNewLine = false;
+ this.handler.onEof(token);
+ }
+
+ //Namespace stack mutations
+ private _enterNamespace(namespace: html.NS): void {
+ this.namespaceStack.unshift(namespace);
+ this.inForeignContent = namespace !== html.NS.HTML;
+ this.tokenizer.inForeignNode = this.inForeignContent;
+ }
+
+ private _leaveCurrentNamespace(): void {
+ this.namespaceStack.shift();
+ this.inForeignContent = this.namespaceStack[0] !== html.NS.HTML;
+ this.tokenizer.inForeignNode = this.inForeignContent;
+ }
+
+ //Token handlers
+ private _ensureTokenizerMode(tn: html.TAG_ID): void {
+ switch (tn) {
+ case $.TEXTAREA:
+ case $.TITLE: {
+ this.tokenizer.state = TokenizerMode.RCDATA;
+ break;
+ }
+ case $.PLAINTEXT: {
+ this.tokenizer.state = TokenizerMode.PLAINTEXT;
+ break;
+ }
+ case $.SCRIPT: {
+ this.tokenizer.state = TokenizerMode.SCRIPT_DATA;
+ break;
+ }
+ case $.STYLE:
+ case $.IFRAME:
+ case $.XMP:
+ case $.NOEMBED:
+ case $.NOFRAMES:
+ case $.NOSCRIPT: {
+ this.tokenizer.state = TokenizerMode.RAWTEXT;
+ break;
+ }
+ default:
+ // Do nothing
+ }
+ }
+
+ /** @internal */
+ onStartTag(token: Token.TagToken): void {
+ let tn = token.tagID;
+
+ switch (tn) {
+ case $.SVG: {
+ this._enterNamespace(html.NS.SVG);
+ break;
+ }
+ case $.MATH: {
+ this._enterNamespace(html.NS.MATHML);
+ break;
+ }
+ default:
+ // Do nothing
+ }
+
+ if (this.inForeignContent) {
+ if (foreignContent.causesExit(token)) {
+ this._leaveCurrentNamespace();
+ } else {
+ const currentNs = this.namespaceStack[0];
+
+ if (currentNs === html.NS.MATHML) {
+ foreignContent.adjustTokenMathMLAttrs(token);
+ } else if (currentNs === html.NS.SVG) {
+ foreignContent.adjustTokenSVGTagName(token);
+ foreignContent.adjustTokenSVGAttrs(token);
+ }
+
+ foreignContent.adjustTokenXMLAttrs(token);
+
+ tn = token.tagID;
+
+ if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) {
+ this._enterNamespace(html.NS.HTML);
+ }
+ }
+ } else {
+ switch (tn) {
+ case $.PRE:
+ case $.TEXTAREA:
+ case $.LISTING: {
+ this.skipNextNewLine = true;
+ break;
+ }
+ case $.IMAGE: {
+ token.tagName = html.TAG_NAMES.IMG;
+ token.tagID = $.IMG;
+ break;
+ }
+ default:
+ // Do nothing
+ }
+
+ this._ensureTokenizerMode(tn);
+ }
+
+ this.handler.onStartTag(token);
+ }
+
+ /** @internal */
+ onEndTag(token: Token.TagToken): void {
+ let tn = token.tagID;
+
+ if (!this.inForeignContent) {
+ const previousNs = this.namespaceStack[1];
+
+ if (previousNs === html.NS.SVG) {
+ const adjustedTagName = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName);
+
+ if (adjustedTagName) {
+ tn = html.getTagID(adjustedTagName);
+ }
+ }
+
+ //NOTE: check for exit from integration point
+ if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs)) {
+ this._leaveCurrentNamespace();
+ }
+ } else if (
+ (tn === $.SVG && this.namespaceStack[0] === html.NS.SVG) ||
+ (tn === $.MATH && this.namespaceStack[0] === html.NS.MATHML)
+ ) {
+ this._leaveCurrentNamespace();
+ }
+
+ // NOTE: adjust end tag name as well for consistency
+ if (this.namespaceStack[0] === html.NS.SVG) {
+ foreignContent.adjustTokenSVGTagName(token);
+ }
+
+ this.handler.onEndTag(token);
+ }
+}
diff --git a/packages/parse5-sax-parser/package.json b/packages/parse5-sax-parser/package.json
index 09241523687545fea354d0f304bcdba947780e45..280c17880c2f2cf63c1353f33ba57b17dcf5f57f 100644
--- a/packages/parse5-sax-parser/package.json
+++ b/packages/parse5-sax-parser/package.json
@@ -1,10 +1,12 @@
{
"name": "parse5-sax-parser",
+ "type": "module",
"description": "Streaming SAX-style HTML parser.",
- "version": "6.0.1",
+ "version": "7.0.0",
"author": "Ivan Nikulin (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://github.com/inikulin/parse5",
+ "funding": "https://github.com/inikulin/parse5?sponsor=1",
"keywords": [
"parse5",
"parser",
@@ -13,15 +15,19 @@
"SAX"
],
"license": "MIT",
- "main": "./lib/index.js",
+ "main": "dist/index.js",
+ "module": "dist/index.js",
+ "types": "dist/index.d.ts",
+ "exports": "./dist/index.js",
"dependencies": {
- "parse5": "^6.0.1"
+ "parse5": "^7.0.0"
},
"repository": {
"type": "git",
"url": "git://github.com/inikulin/parse5.git"
},
"files": [
- "lib"
+ "dist",
+ "!*.map"
]
}
diff --git a/packages/parse5-sax-parser/test/location-info.test.js b/packages/parse5-sax-parser/test/location-info.test.js
deleted file mode 100644
index bba6cbab582f360fe9169d7d4418955dc812794c..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/test/location-info.test.js
+++ /dev/null
@@ -1,48 +0,0 @@
-'use strict';
-
-const assert = require('assert');
-const SAXParser = require('../lib');
-const loadSAXParserTestData = require('../../../test/utils/load-sax-parser-test-data');
-const { writeChunkedToStream } = require('../../../test/utils/common');
-
-exports['Location info (SAX)'] = function() {
- loadSAXParserTestData().forEach(test => {
- //NOTE: we've already tested the correctness of the location info with the Tokenizer tests.
- //So here we just check that SAXParser provides this info in the handlers.
- const parser = new SAXParser({ sourceCodeLocationInfo: true });
-
- const handler = ({ sourceCodeLocation }) => {
- assert.strictEqual(typeof sourceCodeLocation.startLine, 'number');
- assert.strictEqual(typeof sourceCodeLocation.startCol, 'number');
- assert.strictEqual(typeof sourceCodeLocation.startOffset, 'number');
- assert.strictEqual(typeof sourceCodeLocation.endOffset, 'number');
- assert.ok(sourceCodeLocation.startOffset < sourceCodeLocation.endOffset);
- };
-
- parser.on('startTag', handler);
- parser.on('endTag', handler);
- parser.on('doctype', handler);
- parser.on('comment', handler);
- parser.on('text', handler);
-
- writeChunkedToStream(test.src, parser);
- });
-};
-
-exports['Regression - location info for text (GH-153, GH-266)'] = function() {
- const html = 'Here is a title';
- const parser = new SAXParser({ sourceCodeLocationInfo: true });
-
- parser.on('text', ({ sourceCodeLocation }) => {
- assert.deepStrictEqual(sourceCodeLocation, {
- startLine: 1,
- startCol: 35,
- startOffset: 34,
- endLine: 1,
- endCol: 50,
- endOffset: 49
- });
- });
-
- parser.end(html);
-};
diff --git a/packages/parse5-sax-parser/test/location-info.test.ts b/packages/parse5-sax-parser/test/location-info.test.ts
new file mode 100644
index 0000000000000000000000000000000000000000..dba662dc63462d2f3142e75a9d8f7a9bf062a063
--- /dev/null
+++ b/packages/parse5-sax-parser/test/location-info.test.ts
@@ -0,0 +1,49 @@
+import * as assert from 'node:assert';
+import { SAXParser } from '../lib/index.js';
+import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js';
+import { writeChunkedToStream } from 'parse5-test-utils/utils/common.js';
+import type { Token } from 'parse5';
+
+function assertLocation({ sourceCodeLocation }: { sourceCodeLocation: Token.Location }): void {
+ assert.strictEqual(typeof sourceCodeLocation.startLine, 'number');
+ assert.strictEqual(typeof sourceCodeLocation.startCol, 'number');
+ assert.strictEqual(typeof sourceCodeLocation.startOffset, 'number');
+ assert.strictEqual(typeof sourceCodeLocation.endOffset, 'number');
+ assert.ok(sourceCodeLocation.startOffset < sourceCodeLocation.endOffset);
+}
+
+describe('location-info', () => {
+ it('Location info (SAX)', () => {
+ for (const test of loadSAXParserTestData()) {
+ //NOTE: we've already tested the correctness of the location info with the Tokenizer tests.
+ //So here we just check that SAXParser provides this info in the handlers.
+ const parser = new SAXParser({ sourceCodeLocationInfo: true });
+
+ parser.on('startTag', assertLocation);
+ parser.on('endTag', assertLocation);
+ parser.on('doctype', assertLocation);
+ parser.on('comment', assertLocation);
+ parser.on('text', assertLocation);
+
+ writeChunkedToStream(test.src, parser);
+ }
+ });
+
+ it('Regression - location info for text (GH-153, GH-266)', () => {
+ const html = 'Here is a title';
+ const parser = new SAXParser({ sourceCodeLocationInfo: true });
+
+ parser.on('text', ({ sourceCodeLocation }) => {
+ assert.deepStrictEqual(sourceCodeLocation, {
+ startLine: 1,
+ startCol: 35,
+ startOffset: 34,
+ endLine: 1,
+ endCol: 50,
+ endOffset: 49,
+ });
+ });
+
+ parser.end(html);
+ });
+});
diff --git a/packages/parse5-sax-parser/test/parser-feedback-simulator.test.js b/packages/parse5-sax-parser/test/parser-feedback-simulator.test.js
deleted file mode 100644
index 44be6eae15015bd8e7af969373072783401f3038..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/test/parser-feedback-simulator.test.js
+++ /dev/null
@@ -1,16 +0,0 @@
-const path = require('path');
-const Tokenizer = require('parse5/lib/tokenizer');
-const generateTokenizationTests = require('../../../test/utils/generate-tokenization-tests');
-const ParserFeedbackSimulator = require('../lib/parser-feedback-simulator');
-
-generateTokenizationTests(
- exports,
- 'ParserFeedbackSimulator',
- path.join(__dirname, '../../../test/data/parser-feedback'),
- () => {
- const tokenizer = new Tokenizer();
- const feedbackSimulator = new ParserFeedbackSimulator(tokenizer);
-
- return { tokenizer, getNextToken: () => feedbackSimulator.getNextToken() };
- }
-);
diff --git a/packages/parse5-sax-parser/test/parser-feedback-simulator.test.ts b/packages/parse5-sax-parser/test/parser-feedback-simulator.test.ts
new file mode 100644
index 0000000000000000000000000000000000000000..9b43f3d0c6595c7ec55413ca848d797d20cec3e0
--- /dev/null
+++ b/packages/parse5-sax-parser/test/parser-feedback-simulator.test.ts
@@ -0,0 +1,10 @@
+import { generateTokenizationTests } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
+import { ParserFeedbackSimulator } from '../lib/parser-feedback-simulator.js';
+
+const feedbackPath = new URL('../../../test/data/parser-feedback', import.meta.url);
+
+generateTokenizationTests(
+ 'ParserFeedbackSimulator',
+ feedbackPath.pathname,
+ (handler) => new ParserFeedbackSimulator({}, handler).tokenizer
+);
diff --git a/packages/parse5-sax-parser/test/sax-parser.test.js b/packages/parse5-sax-parser/test/sax-parser.test.js
deleted file mode 100644
index b06bf1127f80c70eda1f18afc300f4fcab49467e..0000000000000000000000000000000000000000
--- a/packages/parse5-sax-parser/test/sax-parser.test.js
+++ /dev/null
@@ -1,151 +0,0 @@
-'use strict';
-
-const assert = require('assert');
-const fs = require('fs');
-const path = require('path');
-const SAXParser = require('../lib');
-const loadSAXParserTestData = require('../../../test/utils/load-sax-parser-test-data');
-const {
- getStringDiffMsg,
- writeChunkedToStream,
- removeNewLines,
- WritableStreamStub
-} = require('../../../test/utils/common');
-
-function sanitizeForComparison(str) {
- return removeNewLines(str)
- .replace(/\s/g, '')
- .replace(/'/g, '"')
- .toLowerCase();
-}
-
-function createBasicTest(html, expected, options) {
- return function() {
- //NOTE: the idea of the test is to serialize back given HTML using SAXParser handlers
- let actual = '';
- const parser = new SAXParser(options);
-
- parser.on('doctype', ({ name, publicId, systemId }) => {
- actual += '';
- });
-
- parser.on('startTag', ({ tagName, attrs, selfClosing }) => {
- actual += '<' + tagName;
-
- if (attrs.length) {
- for (let i = 0; i < attrs.length; i++) {
- actual += ' ' + attrs[i].name + '="' + attrs[i].value + '"';
- }
- }
-
- actual += selfClosing ? '/>' : '>';
- });
-
- parser.on('endTag', ({ tagName }) => {
- actual += '' + tagName + '>';
- });
-
- parser.on('text', ({ text }) => {
- actual += text;
- });
-
- parser.on('comment', ({ text }) => {
- actual += '';
- });
-
- parser.once('finish', () => {
- expected = sanitizeForComparison(expected);
- actual = sanitizeForComparison(actual);
-
- //NOTE: use ok assertion, so output will not be polluted by the whole content of the strings
- assert.ok(actual === expected, getStringDiffMsg(actual, expected));
- });
-
- writeChunkedToStream(html, parser);
- };
-}
-
-//Basic tests
-loadSAXParserTestData().forEach(
- (test, idx) => (exports[`SAX - ${idx + 1}.${test.name}`] = createBasicTest(test.src, test.expected, test.options))
-);
-
-exports['SAX - Piping and .stop()'] = function(done) {
- const parser = new SAXParser();
- const writable = new WritableStreamStub();
- let handlerCallCount = 0;
-
- const handler = function() {
- handlerCallCount++;
-
- if (handlerCallCount === 10) {
- parser.stop();
- }
- };
-
- fs.createReadStream(path.join(__dirname, '../../../test/data/huge-page/huge-page.html'), 'utf8')
- .pipe(parser)
- .pipe(writable);
-
- parser.on('startTag', handler);
- parser.on('endTag', handler);
- parser.on('doctype', handler);
- parser.on('comment', handler);
- parser.on('text', handler);
-
- writable.once('finish', () => {
- const expected = fs
- .readFileSync(path.join(__dirname, '../../../test/data/huge-page/huge-page.html'))
- .toString();
-
- assert.strictEqual(handlerCallCount, 10);
- assert.strictEqual(writable.writtenData, expected);
- done();
- });
-};
-
-exports['Regression - SAX - Parser silently exits on big files (GH-97)'] = function(done) {
- const parser = new SAXParser();
-
- fs.createReadStream(path.join(__dirname, '../../../test/data/huge-page/huge-page.html'), 'utf8').pipe(parser);
-
- //NOTE: This is a smoke test - in case of regression it will fail with timeout.
- parser.once('finish', done);
-};
-
-exports['Regression - SAX - Last text chunk must be flushed (GH-271)'] = done => {
- const parser = new SAXParser();
- let foundText = false;
-
- parser.on('text', ({ text }) => {
- foundText = true;
- assert.strictEqual(text, 'text');
- });
-
- parser.once('finish', () => {
- assert.ok(foundText);
- done();
- });
-
- parser.write('text');
- parser.end();
-};
-
-exports['Regression - SAX - Should not accept binary input (GH-269)'] = () => {
- const stream = new SAXParser();
- const buf = Buffer.from('test');
-
- assert.throws(() => stream.write(buf), TypeError);
-};
diff --git a/packages/parse5-sax-parser/test/sax-parser.test.ts b/packages/parse5-sax-parser/test/sax-parser.test.ts
new file mode 100644
index 0000000000000000000000000000000000000000..20ee3e6da765e0bfb96035306619379335df3153
--- /dev/null
+++ b/packages/parse5-sax-parser/test/sax-parser.test.ts
@@ -0,0 +1,140 @@
+import * as assert from 'node:assert';
+import * as fs from 'node:fs';
+import type { SAXParserOptions } from '../lib/index.js';
+import { SAXParser } from '../lib/index.js';
+import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js';
+import {
+ finished,
+ getStringDiffMsg,
+ writeChunkedToStream,
+ removeNewLines,
+ WritableStreamStub,
+} from 'parse5-test-utils/utils/common.js';
+
+function sanitizeForComparison(str: string): string {
+ return removeNewLines(str).replace(/\s/g, '').replace(/'/g, '"').toLowerCase();
+}
+
+function createBasicTest(html: string, expected: string, options?: SAXParserOptions) {
+ return async function (): Promise {
+ //NOTE: the idea of the test is to serialize back given HTML using SAXParser handlers
+ let actual = '';
+ const parser = new SAXParser(options);
+
+ parser.on('doctype', ({ name, publicId, systemId }) => {
+ actual += `';
+ });
+
+ parser.on('startTag', ({ tagName, attrs, selfClosing }) => {
+ actual += `<${tagName}`;
+ for (const attr of attrs) {
+ actual += ` ${attr.name}="${attr.value}"`;
+ }
+ actual += selfClosing ? '/>' : '>';
+ });
+
+ parser.on('endTag', ({ tagName }) => {
+ actual += `${tagName}>`;
+ });
+
+ parser.on('text', ({ text }) => {
+ actual += text;
+ });
+
+ parser.on('comment', ({ text }) => {
+ actual += ``;
+ });
+
+ writeChunkedToStream(html, parser);
+
+ await finished(parser);
+
+ expected = sanitizeForComparison(expected);
+ actual = sanitizeForComparison(actual);
+
+ //NOTE: use ok assertion, so output will not be polluted by the whole content of the strings
+ assert.ok(actual === expected, getStringDiffMsg(actual, expected));
+ };
+}
+
+const hugePage = new URL('../../../test/data/huge-page/huge-page.html', import.meta.url);
+
+describe('SAX parser', () => {
+ //Basic tests
+ for (const [idx, data] of loadSAXParserTestData().entries())
+ it(`${idx + 1}.${data.name}`, createBasicTest(data.src, data.expected));
+
+ it('Piping and .stop()', async () => {
+ const parser = new SAXParser();
+ const writable = new WritableStreamStub();
+ let handlerCallCount = 0;
+
+ function handler(): void {
+ handlerCallCount++;
+
+ if (handlerCallCount === 10) {
+ parser.stop();
+ }
+ }
+
+ fs.createReadStream(hugePage, 'utf8').pipe(parser).pipe(writable);
+
+ parser.on('startTag', handler);
+ parser.on('endTag', handler);
+ parser.on('doctype', handler);
+ parser.on('comment', handler);
+ parser.on('text', handler);
+
+ await finished(writable);
+
+ const expected = fs.readFileSync(hugePage).toString();
+
+ assert.strictEqual(handlerCallCount, 10);
+ assert.strictEqual(writable.writtenData, expected);
+ });
+
+ it('Parser silently exits on big files (GH-97)', () => {
+ const parser = new SAXParser();
+
+ fs.createReadStream(hugePage, 'utf8').pipe(parser);
+
+ //NOTE: This is a smoke test - in case of regression it will fail with timeout.
+ return finished(parser);
+ });
+
+ it('Last text chunk must be flushed (GH-271)', async () => {
+ const parser = new SAXParser();
+ let foundText = false;
+
+ parser.on('text', ({ text }) => {
+ foundText = true;
+ assert.strictEqual(text, 'text');
+ });
+
+ parser.write('text');
+ parser.end();
+
+ await finished(parser);
+
+ assert.ok(foundText);
+ });
+
+ it('Should not accept binary input (GH-269)', () => {
+ const stream = new SAXParser();
+ const buf = Buffer.from('test');
+
+ assert.throws(() => stream.write(buf), TypeError);
+ });
+});
diff --git a/packages/parse5-sax-parser/tsconfig.json b/packages/parse5-sax-parser/tsconfig.json
new file mode 100644
index 0000000000000000000000000000000000000000..ebf4d81b68956ff9387d6a4e366d3270c4e693d3
--- /dev/null
+++ b/packages/parse5-sax-parser/tsconfig.json
@@ -0,0 +1,9 @@
+{
+ "extends": "../../tsconfig.json",
+ "compilerOptions": {
+ "rootDir": "lib",
+ "outDir": "dist"
+ },
+ "include": ["**/*.ts"],
+ "exclude": ["**/*.test.ts", "dist", "test"]
+}
diff --git a/packages/parse5-serializer-stream/LICENSE b/packages/parse5-serializer-stream/LICENSE
deleted file mode 100644
index f3265d4b88f7aac271ec21679195adff8446855e..0000000000000000000000000000000000000000
--- a/packages/parse5-serializer-stream/LICENSE
+++ /dev/null
@@ -1,19 +0,0 @@
-Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
diff --git a/packages/parse5-serializer-stream/README.md b/packages/parse5-serializer-stream/README.md
deleted file mode 100644
index b8aa3c78b977248c625d7a1ab2cd9dcca2272656..0000000000000000000000000000000000000000
--- a/packages/parse5-serializer-stream/README.md
+++ /dev/null
@@ -1,34 +0,0 @@
-
-
-
-
-
-
-
-
parse5-serializer-stream
-Streaming HTML serializer.
-
-
-
-
-npm install --save parse5-serializer-stream
-
-
-
-
- 📖 Documentation 📖
-
-
----
-
-
- List of parse5 toolset packages
-
-
-
- GitHub
-
-
-
- Version history
-
diff --git a/packages/parse5-serializer-stream/docs/index.md b/packages/parse5-serializer-stream/docs/index.md
deleted file mode 100644
index 9f274da88be5f2d11f627f8e8c9ec1de01a86ab1..0000000000000000000000000000000000000000
--- a/packages/parse5-serializer-stream/docs/index.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# Class: SerializerStream
-
-Streaming AST node to an HTML serializer. A [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable).
-
-*__example__*:
-
-```js
-const parse5 = require('parse5');
-const SerializerStream = require('parse5-serializer-stream');
-const fs = require('fs');
-
-const file = fs.createWriteStream('/home/index.html');
-
-// Serializes the parsed document to HTML and writes it to the file.
-const document = parse5.parse('Who is John Galt?');
-const serializer = new SerializerStream(document);
-
-serializer.pipe(file);
-```
-
-### Constructors
-
-* [constructor](#constructor)
-
-### Methods and events
-
-See: [readable stream API](https://nodejs.org/api/stream.html#stream_class_stream_readable).
-
----
-
-## Constructors
-
-
-
-### constructor
-
-⊕ **new SerializerStream**(node: *Node*, options?: *[SerializerOptions](../../parse5/docs/options/serializer-options.md)*): [SerializerStream]()
-
-Streaming AST node to an HTML serializer. A readable stream.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | Node | Node to serialize. |
-| `Optional` options | [SerializerOptions](../../parse5/docs/options/serializer-options.md) | Serialization options. |
-
-**Returns:** [SerializerStream]()
-
-___
diff --git a/packages/parse5-serializer-stream/lib/index.js b/packages/parse5-serializer-stream/lib/index.js
deleted file mode 100644
index 08bfb8d40740e65d9ae184cec07e356714532adb..0000000000000000000000000000000000000000
--- a/packages/parse5-serializer-stream/lib/index.js
+++ /dev/null
@@ -1,29 +0,0 @@
-'use strict';
-
-const { Readable } = require('stream');
-const Serializer = require('parse5/lib/serializer');
-
-class SerializerStream extends Readable {
- constructor(node, options) {
- super({ encoding: 'utf8' });
-
- this.serializer = new Serializer(node, options);
-
- Object.defineProperty(this.serializer, 'html', {
- //NOTE: To make `+=` concat operator work properly we define
- //getter which always returns empty string
- get: function() {
- return '';
- },
- set: this.push.bind(this)
- });
- }
-
- //Readable stream implementation
- _read() {
- this.serializer.serialize();
- this.push(null);
- }
-}
-
-module.exports = SerializerStream;
diff --git a/packages/parse5-serializer-stream/package.json b/packages/parse5-serializer-stream/package.json
deleted file mode 100644
index 2c2948adaef5b48cece599309b1849eea0192fa9..0000000000000000000000000000000000000000
--- a/packages/parse5-serializer-stream/package.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
- "name": "parse5-serializer-stream",
- "description": "Streaming HTML serializer.",
- "version": "6.0.1",
- "author": "Ivan Nikulin (https://github.com/inikulin)",
- "contributors": "https://github.com/inikulin/parse5/graphs/contributors",
- "homepage": "https://github.com/inikulin/parse5",
- "keywords": [
- "parse5",
- "parser",
- "stream",
- "streaming",
- "serializer",
- "serialiser"
- ],
- "license": "MIT",
- "main": "./lib/index.js",
- "dependencies": {
- "parse5": "^6.0.1"
- },
- "repository": {
- "type": "git",
- "url": "git://github.com/inikulin/parse5.git"
- },
- "files": [
- "lib"
- ]
-}
diff --git a/packages/parse5-serializer-stream/test/serializer-stream.test.js b/packages/parse5-serializer-stream/test/serializer-stream.test.js
deleted file mode 100644
index 71e5161d7f0d1d083159f2451a6ca8158b08408a..0000000000000000000000000000000000000000
--- a/packages/parse5-serializer-stream/test/serializer-stream.test.js
+++ /dev/null
@@ -1,16 +0,0 @@
-'use strict';
-
-const SerializerStream = require('../lib');
-const generateSeriliazerTests = require('../../../test/utils/generate-serializer-tests');
-const { WritableStreamStub } = require('../../../test/utils/common');
-
-generateSeriliazerTests(exports, 'SeriliazerStream', (document, opts) => {
- const stream = new SerializerStream(document, opts);
- const writable = new WritableStreamStub();
-
- stream.pipe(writable);
-
- return new Promise(resolve => {
- writable.once('finish', () => resolve(writable.writtenData));
- });
-});
diff --git a/packages/parse5/README.md b/packages/parse5/README.md
index 32b53be63deb9b0c9d752462f3fc330eb87cf359..139f8c6ae03cd80b0696e78a3d79c5132aeb1d19 100644
--- a/packages/parse5/README.md
+++ b/packages/parse5/README.md
@@ -16,7 +16,7 @@
- 📖 Documentation 📖
+ 📖 Documentation 📖
---
@@ -34,5 +34,5 @@
- Version history
+ Changelog
diff --git a/packages/parse5/docs/index.md b/packages/parse5/docs/index.md
deleted file mode 100644
index f869de1e7bd6ad11898c7baba65b98320f68d4d8..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/index.md
+++ /dev/null
@@ -1,111 +0,0 @@
-# parse5
-
-> **NOTE:** By default all functions operate with [tree format](tree-adapter/default/interface-list.md) produced
-> by the default tree adapter. Tree format can be changed by providing custom [tree adapter](tree-adapter/interface.md) implementation.
-
-### Functions
-
-* [parse](#parse)
-* [parseFragment](#parsefragment)
-* [serialize](#serialize)
-
-
-
-### parse
-
-▸ **parse**(html: _`string`_, options?: _[ParserOptions](options/parser-options.md)_): Document
-
-Parses an HTML string.
-
-_**example**_:
-
-```js
-const parse5 = require('parse5');
-
-const document = parse5.parse('Hi there!');
-
-console.log(document.childNodes[1].tagName); //> 'html'
-```
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------------------ | -------------------------------------------------------------------------------------------------------------- | ------------------ |
-| html | `string` | Input HTML string. |
-| `Optional` options | [ParserOptions](options/parser-options.md) | Parsing options. |
-
-**Returns:** Document
-
----
-
-
-
-### parseFragment
-
-▸ **parseFragment**(fragmentContext: _Element_, html: _`string`_, options?: _[ParserOptions](options/parser-options.md)_): DocumentFragment
-
-▸ **parseFragment**(html: _`string`_, options?: _[ParserOptions](options/parser-options.md)_): DocumentFragment
-
-Parses an HTML fragment.
-
-_**example**_:
-
-```js
-const parse5 = require('parse5');
-
-const documentFragment = parse5.parseFragment('');
-
-console.log(documentFragment.childNodes[0].tagName); //> 'table'
-
-// Parses the html fragment in the context of the parsed element.
-const trFragment = parser.parseFragment(documentFragment.childNodes[0], 'Shake it, baby |
');
-
-console.log(trFragment.childNodes[0].childNodes[0].tagName); //> 'td'
-```
-
-**Parameters:**
-
-| Param | Type | Description |
-| -------------------------- | -------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- |
-| `Optional` fragmentContext | Element | Parsing context element. If specified, given fragment will be parsed as if it was set to the context element's \`innerHTML\` property. |
-| html | `string` | Input HTML fragment string. |
-| `Optional` options | [ParserOptions](options/parser-options.md) | Parsing options. |
-
-**Returns:** DocumentFragment
-
----
-
-
-
-### serialize
-
-▸ **serialize**(node: _Node_, options?: _[SerializerOptions](options/serializer-options.md)_): `string`
-
-Serializes an AST node to an HTML string.
-
-_**example**_:
-
-```js
-const parse5 = require('parse5');
-
-const document = parse5.parse('Hi there!');
-
-// Serializes a document.
-const html = parse5.serialize(document);
-
-// Serializes the element content.
-const str = parse5.serialize(document.childNodes[1]);
-
-console.log(str); //> 'Hi there!'
-```
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------------------ | ---------------------------------------------------------------------------------------------------------------------- | ---------------------- |
-| node | Node | Node to serialize. |
-| `Optional` options | [SerializerOptions](options/serializer-options.md) | Serialization options. |
-
-**Returns:** `string`
-
----
diff --git a/packages/parse5/docs/options/parser-options.md b/packages/parse5/docs/options/parser-options.md
deleted file mode 100644
index 0c492dd1838026052d8f9e8c57cf38433bd4b83e..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/options/parser-options.md
+++ /dev/null
@@ -1,48 +0,0 @@
-# Interface: ParserOptions
-
-### Properties
-
-* [sourceCodeLocationInfo](#sourcecodelocationinfo)
-* [scriptingEnabled](#scriptingenabled)
-* [treeAdapter](#treeadapter)
-
----
-
-## Properties
-
-
-
-### `` sourceCodeLocationInfo
-
-**● sourceCodeLocationInfo**: *`boolean`*
-
-Enables source code location information. When enabled, each node (except the root node) will have a `sourceCodeLocation` property (property name can be different depending on [tree adapter](../tree-adapter/interface.md) that has been used, hereinafter property names for the [DefaultTreeAdapter](../tree-adapter/interface-list.md) will be given). If the node is not an empty element, `sourceCodeLocation` will be a [ElementLocation](../source-code-location/element-location.md) object, otherwise it will be [Location](../source-code-location/location.md). If the element was implicitly created by the parser (as part of [tree correction](https://html.spec.whatwg.org/multipage/syntax.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser)), its `sourceCodeLocation` property will be `undefined`.
-
-**Default:** `false`
-
-___
-
-
-### `` scriptingEnabled
-
-**● scriptingEnabled**: *`boolean`*
-
-The [scripting flag](https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag). If set to
-`true`, `noscript` element content will be parsed as text.
-
-**Default:** `true`
-
-___
-
-
-
-### `` treeAdapter
-
-**● treeAdapter**: *[TreeAdapter](../tree-adapter/interface.md)*
-
-Specifies the resulting tree format.
-
-**Default:** [DefaultTreeAdapter](../tree-adapter/default/interface-list.md)
-
-___
-
diff --git a/packages/parse5/docs/options/serializer-options.md b/packages/parse5/docs/options/serializer-options.md
deleted file mode 100644
index aa55c16dace2ce22d8cf2e342a115f985543ca77..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/options/serializer-options.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# Interface: SerializerOptions
-
-### Properties
-
-* [treeAdapter](#treeadapter)
-
----
-
-## Properties
-
-
-
-### `` treeAdapter
-
-**● treeAdapter**: *[TreeAdapter](../tree-adapter/interface.md)*
-
-Specifies input tree format.
-
-**Default:** [DefaultTreeAdapter](../tree-adapter/default/interface-list.md).
-
-___
-
diff --git a/packages/parse5/docs/source-code-location/element-location.md b/packages/parse5/docs/source-code-location/element-location.md
deleted file mode 100644
index c05bd67bf5e28befd4187341c3d557480f9d5f78..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/source-code-location/element-location.md
+++ /dev/null
@@ -1,100 +0,0 @@
-# Interface: ElementLocation
-
-### Properties
-
-* [attrs](#attrs)
-* [endCol](#endcol)
-* [endOffset](#endoffset)
-* [endLine](#endline)
-* [endTag](#endtag)
-* [startCol](#startcol)
-* [startOffset](#startoffset)
-* [startLine](#startline)
-* [startTag](#starttag)
-
----
-
-## Properties
-
-
-
-### attrs
-
-**● attrs**: *\[attributeName: `string`\]: [Location](location.md)*
-
-Start tag attributes' location info
-
-___
-
-
-### endCol
-
-**● endCol**: *`number`*
-
-One-based column index of the last character
-
-___
-
-
-### endOffset
-
-**● endOffset**: *`number`*
-
-Zero-based last character index
-
-___
-
-
-### endLine
-
-**● endLine**: *`number`*
-
-One-based line index of the last character
-
-___
-
-
-### endTag
-
-**● endTag**: *[Location](location.md)*
-
-Element's end tag location info.
-
-___
-
-
-### startCol
-
-**● startCol**: *`number`*
-
-One-based column index of the first character
-
-___
-
-
-### startOffset
-
-**● startOffset**: *`number`*
-
-Zero-based first character index
-
-___
-
-
-### startLine
-
-**● startLine**: *`number`*
-
-One-based line index of the first character
-
-___
-
-
-### startTag
-
-**● startTag**: *[StartTagLocation](start-tag-location.md)*
-
-Element's start tag location info.
-
-___
-
diff --git a/packages/parse5/docs/source-code-location/end-location.md b/packages/parse5/docs/source-code-location/end-location.md
deleted file mode 100644
index 8b3bd57db6240842619d927ccd96c92686b81b15..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/source-code-location/end-location.md
+++ /dev/null
@@ -1,48 +0,0 @@
-# Interface: EndLocation
-
-### Properties
-
-* [endCol](#endcol)
-* [endOffset](#endoffset)
-* [endLine](#endline)
-* [endTag](#endtag)
-
----
-
-## Properties
-
-
-
-### endCol
-
-**● endCol**: *`number`*
-
-One-based column index of the last character
-
-___
-
-
-### endOffset
-
-**● endOffset**: *`number`*
-
-Zero-based last character index
-
-___
-
-
-### endLine
-
-**● endLine**: *`number`*
-
-One-based line index of the last character
-
-___
-
-
-### endTag
-
-**● endTag**: *[Location](location.md)|undefined*
-
-Element's end tag location info.
-This property is undefined, if the element has no closing tag.
\ No newline at end of file
diff --git a/packages/parse5/docs/source-code-location/location.md b/packages/parse5/docs/source-code-location/location.md
deleted file mode 100644
index 316f49c0aa8b6ff573692ec4c6b5408b1e374806..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/source-code-location/location.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# Interface: Location
-
-### Properties
-
-* [endCol](#endcol)
-* [endOffset](#endoffset)
-* [endLine](#endline)
-* [startCol](#startcol)
-* [startOffset](#startoffset)
-* [startLine](#startline)
-
----
-
-## Properties
-
-
-
-### endCol
-
-**● endCol**: *`number`*
-
-One-based column index of the last character
-
-___
-
-
-### endOffset
-
-**● endOffset**: *`number`*
-
-Zero-based last character index
-
-___
-
-
-### endLine
-
-**● endLine**: *`number`*
-
-One-based line index of the last character
-
-___
-
-
-### startCol
-
-**● startCol**: *`number`*
-
-One-based column index of the first character
-
-___
-
-
-### startOffset
-
-**● startOffset**: *`number`*
-
-Zero-based first character index
-
-___
-
-
-### startLine
-
-**● startLine**: *`number`*
-
-One-based line index of the first character
-
-___
diff --git a/packages/parse5/docs/source-code-location/start-tag-location.md b/packages/parse5/docs/source-code-location/start-tag-location.md
deleted file mode 100644
index 7fc08e57b59bf4921f09e09d3d82f60acb0927be..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/source-code-location/start-tag-location.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Interface: StartTagLocation
-
-### Properties
-
-* [attrs](#attrs)
-* [endCol](#endcol)
-* [endOffset](#endoffset)
-* [endLine](#endline)
-* [startCol](#startcol)
-* [startOffset](#startoffset)
-* [startLine](#startline)
-
----
-
-## Properties
-
-
-
-### attrs
-
-**● attrs**: *\[attributeName: `string`\]: [Location](location.md)*
-
-Start tag attributes' location info
-
-___
-
-
-### endCol
-
-**● endCol**: *`number`*
-
-One-based column index of the last character
-
-___
-
-
-### endOffset
-
-**● endOffset**: *`number`*
-
-Zero-based last character index
-
-___
-
-
-### endLine
-
-**● endLine**: *`number`*
-
-One-based line index of the last character
-
-___
-
-
-### startCol
-
-**● startCol**: *`number`*
-
-One-based column index of the first character
-
-___
-
-
-### startOffset
-
-**● startOffset**: *`number`*
-
-Zero-based first character index
-
-___
-
-
-### startLine
-
-**● startLine**: *`number`*
-
-One-based line index of the first character
-
-___
diff --git a/packages/parse5/docs/tree-adapter/default/attribute.md b/packages/parse5/docs/tree-adapter/default/attribute.md
deleted file mode 100644
index f1915d761408a47406c1c77085e2ccbc9e931513..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/tree-adapter/default/attribute.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# Interface: Attribute
-
-### Properties
-
-* [name](#name)
-* [namespace](#namespace)
-* [prefix](#prefix)
-* [value](#value)
-
----
-
-## Properties
-
-
-
-### name
-
-**● name**: *`string`*
-
-The name of the attribute.
-
-___
-
-
-### `` namespace
-
-**● namespace**: *`string`*
-
-The namespace of the attribute.
-
-___
-
-
-### `` prefix
-
-**● prefix**: *`string`*
-
-The namespace-related prefix of the attribute.
-
-___
-
-
-### value
-
-**● value**: *`string`*
-
-The value of the attribute.
-
-___
-
diff --git a/packages/parse5/docs/tree-adapter/default/comment-node.md b/packages/parse5/docs/tree-adapter/default/comment-node.md
deleted file mode 100644
index bc2d5ac02525f2795811e458b6f1a6db5904ef15..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/tree-adapter/default/comment-node.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Interface: CommentNode
-
-### Properties
-
-* [data](#data)
-* [nodeName](#nodename)
-* [parentNode](#parentnode)
-* [sourceCodeLocation](#sourcecodelocation)
-
----
-
-## Properties
-
-
-
-### data
-
-**● data**: *`string`*
-
-Comment text.
-
-___
-
-
-### nodeName
-
-**● nodeName**: *"#comment"*
-
-The name of the node.
-
-___
-
-
-### parentNode
-
-**● parentNode**: *Node*
-
-Parent node.
-
-___
-
-
-### `` sourceCodeLocation
-
-**● sourceCodeLocation**: *[Location](../../source-code-location/location.md)*
-
-Comment source code location info. Available if location info is enabled via [ParserOptions](../../options/parser-options.md).
-
-___
diff --git a/packages/parse5/docs/tree-adapter/default/document-fragment.md b/packages/parse5/docs/tree-adapter/default/document-fragment.md
deleted file mode 100644
index 30514ce78a6c10482ce52f74ce718494e994f7ad..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/tree-adapter/default/document-fragment.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Interface: DocumentFragment
-
-### Properties
-
-* [childNodes](#childnodes)
-* [nodeName](#nodename)
-
----
-
-## Properties
-
-
-
-### childNodes
-
-**● childNodes**: *Node[]*
-
-Child nodes.
-
-___
-
-
-### nodeName
-
-**● nodeName**: *"#document-fragment"*
-
-The name of the node.
-
-___
-
diff --git a/packages/parse5/docs/tree-adapter/default/document-type.md b/packages/parse5/docs/tree-adapter/default/document-type.md
deleted file mode 100644
index f8f05bdbd468953ce750fe43ed7995cacf0e619c..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/tree-adapter/default/document-type.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# Interface: DocumentType
-
-### Properties
-
-* [name](#name)
-* [nodeName](#nodename)
-* [publicId](#publicid)
-* [systemId](#systemid)
-
----
-
-## Properties
-
-
-
-### name
-
-**● name**: *`string`*
-
-Document type name.
-
-___
-
-
-### nodeName
-
-**● nodeName**: *"#documentType"*
-
-The name of the node.
-
-___
-
-
-### publicId
-
-**● publicId**: *`string`*
-
-Document type public identifier.
-
-___
-
-
-### systemId
-
-**● systemId**: *`string`*
-
-Document type system identifier.
-
-___
-
diff --git a/packages/parse5/docs/tree-adapter/default/document.md b/packages/parse5/docs/tree-adapter/default/document.md
deleted file mode 100644
index 18b2d69f5959b4cf9ee6acab934ceb7063f6e7f4..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/tree-adapter/default/document.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Interface: Document
-
-### Properties
-
-* [childNodes](#childnodes)
-* [mode](#mode)
-* [nodeName](#nodename)
-
----
-
-## Properties
-
-
-
-### childNodes
-
-**● childNodes**: *Node[]*
-
-Child nodes.
-
-___
-
-
-### mode
-
-**● mode**: *"no-quirks" | "quirks" | "limited-quirks"*
-
-[Document mode](https://dom.spec.whatwg.org/#concept-document-limited-quirks).
-
-___
-
-
-### nodeName
-
-**● nodeName**: *"#document"*
-
-The name of the node.
-
-___
-
diff --git a/packages/parse5/docs/tree-adapter/default/element.md b/packages/parse5/docs/tree-adapter/default/element.md
deleted file mode 100644
index df24926cfb7af3932a04a0196b192853f4257bdf..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/tree-adapter/default/element.md
+++ /dev/null
@@ -1,80 +0,0 @@
-# Interface: Element
-
-### Properties
-
-* [attrs](#attrs)
-* [childNodes](#childnodes)
-* [namespaceURI](#namespaceuri)
-* [nodeName](#nodename)
-* [parentNode](#parentnode)
-* [sourceCodeLocation](#sourcecodelocation)
-* [tagName](#tagname)
-
----
-
-## Properties
-
-
-
-### attrs
-
-**● attrs**: *[Attribute](attribute.md)[]*
-
-List of element attributes.
-
-___
-
-
-### childNodes
-
-**● childNodes**: *Node[]*
-
-Child nodes.
-
-___
-
-
-### namespaceURI
-
-**● namespaceURI**: *`string`*
-
-Element namespace.
-
-___
-
-
-### nodeName
-
-**● nodeName**: *`string`*
-
-The name of the node. Equals to element [tagName](#tagname).
-
-___
-
-
-### parentNode
-
-**● parentNode**: *Node*
-
-Parent node.
-
-___
-
-
-### `` sourceCodeLocation
-
-**● sourceCodeLocation**: *[ElementLocation](../../source-code-location/element-location.md)*
-
-Element source code location info. Available if location info is enabled via [ParserOptions](../../options/parser-options.md).
-
-___
-
-
-### tagName
-
-**● tagName**: *`string`*
-
-Element tag name.
-
-___
-
diff --git a/packages/parse5/docs/tree-adapter/default/interface-list.md b/packages/parse5/docs/tree-adapter/default/interface-list.md
deleted file mode 100644
index 78e0ea61c16562e69bb10d351f781c3f8fc00bd0..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/tree-adapter/default/interface-list.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# List of tree interfaces produced by default [tree adapter](../interface.md)
-
-* [Attribute](attribute.md)
-* [CommentNode](comment-node.md)
-* [Document](document.md)
-* [DocumentFragment](document-fragment.md)
-* [DocumentType](document-type.md)
-* [Element](element.md)
-* [TextNode](text-node.md)
-
----
-
diff --git a/packages/parse5/docs/tree-adapter/default/text-node.md b/packages/parse5/docs/tree-adapter/default/text-node.md
deleted file mode 100644
index 7fc08beda086c46a54edfe3c119118a0c188e167..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/tree-adapter/default/text-node.md
+++ /dev/null
@@ -1,51 +0,0 @@
-# Interface: TextNode
-
-### Properties
-
-* [nodeName](#nodename)
-* [parentNode](#parentnode)
-* [sourceCodeLocation](#sourcecodelocation)
-* [value](#value)
-
----
-
-## Properties
-
-
-
-### nodeName
-
-**● nodeName**: *"#text"*
-
-The name of the node.
-
-___
-
-
-### parentNode
-
-**● parentNode**: *Node*
-
-Parent node.
-
-___
-
-
-### `` sourceCodeLocation
-
-**● sourceCodeLocation**: *[Location](../../source-code-location/location.md)*
-
-Text node source code location info. Available if location info is enabled via [ParserOptions](../../optiona/parser-options.md).
-
-___
-
-
-
-### value
-
-**● value**: *`string`*
-
-Text content.
-
-___
-
diff --git a/packages/parse5/docs/tree-adapter/interface.md b/packages/parse5/docs/tree-adapter/interface.md
deleted file mode 100644
index 238fb190ba02fe611d0c4c0c7d66af156ef9d81b..0000000000000000000000000000000000000000
--- a/packages/parse5/docs/tree-adapter/interface.md
+++ /dev/null
@@ -1,608 +0,0 @@
-# Interface: TreeAdapter
-
-Tree adapter is a set of utility functions that provides minimal required abstraction layer beetween parser and a specific AST format. Note that `TreeAdapter` is not designed to be a general purpose AST manipulation library. You can build such library on top of existing `TreeAdapter` or use one of the existing libraries from npm.
-
-*__See__*: [default implementation](https://github.com/inikulin/parse5/blob/master/packages/parse5/lib/tree-adapters/default.js)
-
-### Methods
-
-* [adoptAttributes](#adoptattributes)
-* [appendChild](#appendchild)
-* [createCommentNode](#createcommentnode)
-* [createDocument](#createdocument)
-* [createDocumentFragment](#createdocumentfragment)
-* [createElement](#createelement)
-* [detachNode](#detachnode)
-* [getAttrList](#getattrlist)
-* [getChildNodes](#getchildnodes)
-* [getCommentNodeContent](#getcommentnodecontent)
-* [getDocumentMode](#getdocumentmode)
-* [getDocumentTypeNodeName](#getdocumenttypenodename)
-* [getDocumentTypeNodePublicId](#getdocumenttypenodepublicid)
-* [getDocumentTypeNodeSystemId](#getdocumenttypenodesystemid)
-* [getFirstChild](#getfirstchild)
-* [getNamespaceURI](#getnamespaceuri)
-* [getNodeSourceCodeLocation](#getnodesourcecodelocation)
-* [getParentNode](#getparentnode)
-* [getTagName](#gettagname)
-* [getTemplateContent](#gettemplatecontent)
-* [getTextNodeContent](#gettextnodecontent)
-* [insertBefore](#insertbefore)
-* [insertText](#inserttext)
-* [insertTextBefore](#inserttextbefore)
-* [isCommentNode](#iscommentnode)
-* [isDocumentTypeNode](#isdocumenttypenode)
-* [isElementNode](#iselementnode)
-* [isTextNode](#istextnode)
-* [setDocumentMode](#setdocumentmode)
-* [setDocumentType](#setdocumenttype)
-* [setNodeSourceCodeLocation](#setnodesourcecodelocation)
-* [setTemplateContent](#settemplatecontent)
-* [updateNodeSourceCodeLocation](#updatenodesourcecodelocation)
----
-
-## Methods
-
-
-
-### adoptAttributes
-
-▸ **adoptAttributes**(recipient: *Element*, attrs: *Attribute[]*): `void`
-
-Copies attributes to the given element. Only attributes that are not yet present in the element are copied.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| recipient | Element | Element to copy attributes into. |
-| attrs | Attribute[] | Attributes to copy. |
-
-**Returns:** `void`
-
-___
-
-
-### appendChild
-
-▸ **appendChild**(parentNode: *Node*, newNode: *Node*): `void`
-
-Appends a child node to the given parent node.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| parentNode | ParentNode | Parent node. |
-| newNode | Node | Child node. |
-
-**Returns:** `void`
-
-___
-
-
-### createCommentNode
-
-▸ **createCommentNode**(data: *`string`*): CommentNode
-
-Creates a comment node.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| data | `string` | Comment text. |
-
-**Returns:** CommentNode
-
-___
-
-
-### createDocument
-
-▸ **createDocument**(): Document
-
-Creates a document node.
-
-**Returns:** Document
-
-___
-
-
-### createDocumentFragment
-
-▸ **createDocumentFragment**(): DocumentFragment
-
-Creates a document fragment node.
-
-**Returns:** DocumentFragment
-
-___
-
-
-### createElement
-
-▸ **createElement**(tagName: *`string`*, namespaceURI: *`string`*, attrs: *Attribute[]*): Element
-
-Creates an element node.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| tagName | `string` | Tag name of the element. |
-| namespaceURI | `string` | Namespace of the element. |
-| attrs | Attribute[] | Attribute name-value pair array. Foreign attributes may contain \`namespace\` and \`prefix\` fields as well. |
-
-**Returns:** Element
-
-___
-
-
-### detachNode
-
-▸ **detachNode**(node: *Node*): `void`
-
-Removes a node from its parent.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | Node | Node to remove. |
-
-**Returns:** `void`
-
-___
-
-
-### getAttrList
-
-▸ **getAttrList**(element: *Element*): Attribute[]
-
-Returns the given element's attributes in an array, in the form of name-value pairs. Foreign attributes may contain `namespace` and `prefix` fields as well.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| element | Element | Element. |
-
-**Returns:** Attribute[]
-
-___
-
-
-### getChildNodes
-
-▸ **getChildNodes**(node: *Node*): Node[]
-
-Returns the given node's children in an array.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | ParentNode | Node. |
-
-**Returns:** Node[]
-
-___
-
-
-### getCommentNodeContent
-
-▸ **getCommentNodeContent**(commentNode: *CommentNode*): `string`
-
-Returns the given comment node's content.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| commentNode | CommentNode | Comment node. |
-
-**Returns:** `string`
-
-___
-
-
-### getDocumentMode
-
-▸ **getDocumentMode**(document: *Document*): *"no-quirks" | "quirks" | "limited-quirks"*
-
-Returns [document mode](https://dom.spec.whatwg.org/#concept-document-limited-quirks).
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| document | Document | Document node. |
-
-**Returns:** *"no-quirks" | "quirks" | "limited-quirks"*
-
-___
-
-
-### getDocumentTypeNodeName
-
-▸ **getDocumentTypeNodeName**(doctypeNode: *DocumentType*): `string`
-
-Returns the given document type node's name.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| doctypeNode | DocumentType | Document type node. |
-
-**Returns:** `string`
-
-___
-
-
-### getDocumentTypeNodePublicId
-
-▸ **getDocumentTypeNodePublicId**(doctypeNode: *DocumentType*): `string`
-
-Returns the given document type node's public identifier.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| doctypeNode | DocumentType | Document type node. |
-
-**Returns:** `string`
-
-___
-
-
-### getDocumentTypeNodeSystemId
-
-▸ **getDocumentTypeNodeSystemId**(doctypeNode: *DocumentType*): `string`
-
-Returns the given document type node's system identifier.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| doctypeNode | DocumentType | Document type node. |
-
-**Returns:** `string`
-
-___
-
-
-### getFirstChild
-
-▸ **getFirstChild**(node: *Node*): Node
-
-Returns the first child of the given node.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | ParentNode | Node. |
-
-**Returns:** Node
-
-___
-
-
-### getNamespaceURI
-
-▸ **getNamespaceURI**(element: *Element*): `string`
-
-Returns the given element's namespace.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| element | Element | Element. |
-
-**Returns:** `string`
-
-___
-
-
-### getNodeSourceCodeLocation
-
-▸ **getNodeSourceCodeLocation**(node: *Node*): [Location](../source-code-location/location.md) | [ElementLocation](../source-code-location/element-location.md)
-
-Returns the given node's source code location information.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | Node | Node. |
-
-**Returns:** [Location](../source-code-location/location.md) | [ElementLocation](../source-code-location/element-location.md)
-
-___
-
-
-### getParentNode
-
-▸ **getParentNode**(node: *Node*): ParentNode
-
-Returns the given node's parent.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | Node | Node. |
-
-**Returns:** ParentNode
-
-___
-
-
-### getTagName
-
-▸ **getTagName**(element: *Element*): `string`
-
-Returns the given element's tag name.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| element | Element | Element. |
-
-**Returns:** `string`
-
-___
-
-
-### getTemplateContent
-
-▸ **getTemplateContent**(templateElement: *Element*): DocumentFragment
-
-Returns the `` element content element.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| templateElement | Element | `` element. |
-
-**Returns:** DocumentFragment
-
-___
-
-
-### getTextNodeContent
-
-▸ **getTextNodeContent**(textNode: *TextNode*): `string`
-
-Returns the given text node's content.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| textNode | TextNode | Text node. |
-
-**Returns:** `string`
-
-___
-
-
-### insertBefore
-
-▸ **insertBefore**(parentNode: *Node*, newNode: *Node*, referenceNode: *Node*): `void`
-
-Inserts a child node to the given parent node before the given reference node.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| parentNode | ParentNode | Parent node. |
-| newNode | Node | Child node. |
-| referenceNode | Node | Reference node. |
-
-**Returns:** `void`
-
-___
-
-
-### insertText
-
-▸ **insertText**(parentNode: *Node*, text: *`string`*): `void`
-
-Inserts text into a node. If the last child of the node is a text node, the provided text will be appended to the text node content. Otherwise, inserts a new text node with the given text.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| parentNode | ParentNode | Node to insert text into. |
-| text | `string` | Text to insert. |
-
-**Returns:** `void`
-
-___
-
-
-### insertTextBefore
-
-▸ **insertTextBefore**(parentNode: *Node*, text: *`string`*, referenceNode: *Node*): `void`
-
-Inserts text into a sibling node that goes before the reference node. If this sibling node is the text node, the provided text will be appended to the text node content. Otherwise, inserts a new sibling text node with the given text before the reference node.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| parentNode | ParentNode | Node to insert text into. |
-| text | `string` | Text to insert. |
-| referenceNode | Node | Node to insert text before. |
-
-**Returns:** `void`
-
-___
-
-
-### isCommentNode
-
-▸ **isCommentNode**(node: *Node*): `boolean`
-
-Determines if the given node is a comment node.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | Node | Node. |
-
-**Returns:** `boolean`
-
-___
-
-
-### isDocumentTypeNode
-
-▸ **isDocumentTypeNode**(node: *Node*): `boolean`
-
-Determines if the given node is a document type node.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | Node | Node. |
-
-**Returns:** `boolean`
-
-___
-
-
-### isElementNode
-
-▸ **isElementNode**(node: *Node*): `boolean`
-
-Determines if the given node is an element.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | Node | Node. |
-
-**Returns:** `boolean`
-
-___
-
-
-### isTextNode
-
-▸ **isTextNode**(node: *Node*): `boolean`
-
-Determines if the given node is a text node.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | Node | Node. |
-
-**Returns:** `boolean`
-
-___
-
-
-### setDocumentMode
-
-▸ **setDocumentMode**(document: *Document*, mode: *"no-quirks" | "quirks" | "limited-quirks"*): `void`
-
-Sets the [document mode](https://dom.spec.whatwg.org/#concept-document-limited-quirks).
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| document | Document | Document node. |
-| mode | *"no-quirks" | "quirks" | "limited-quirks"* | Document mode. |
-
-**Returns:** `void`
-
-___
-
-
-### setDocumentType
-
-▸ **setDocumentType**(document: *Document*, name: *`string`*, publicId: *`string`*, systemId: *`string`*): `void`
-
-Sets the document type. If the `document` already contains a document type node, the `name`, `publicId` and `systemId` properties of this node will be updated with the provided values. Otherwise, creates a new document type node with the given properties and inserts it into the `document`.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| document | Document | Document node. |
-| name | `string` | Document type name. |
-| publicId | `string` | Document type public identifier. |
-| systemId | `string` | Document type system identifier. |
-
-**Returns:** `void`
-
-___
-
-
-### setNodeSourceCodeLocation
-
-▸ **setNodeSourceCodeLocation**(node: *Node*, location: *[Location](../source-code-location/location.md) | [ElementLocation](../source-code-location/element-location.md)*): `void`
-
-Attaches source code location information to the node.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | Node | Node. |
-| location | [Location](../source-code-location/location.md) | [ElementLocation](../source-code-location/element-location.md) | Source code location information. |
-
-**Returns:** `void`
-
-___
-
-
-### setTemplateContent
-
-▸ **setTemplateContent**(templateElement: *Element*, contentElement: *DocumentFragment*): `void`
-
-Sets the `` element content element.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| templateElement | Element | `` element. |
-| contentElement | DocumentFragment | Content element. |
-
-**Returns:** `void`
-___
-
-
-### updateNodeSourceCodeLocation
-
-▸ **updateNodeSourceCodeLocation**(node: *Node*, endLocation: *[EndLocation](../source-code-location/end-location.md)*): `void`
-
-Updates the source code location of nodes.
-
-**Parameters:**
-
-| Param | Type | Description |
-| ------ | ------ | ------ |
-| node | Node | Node. |
-| endLocation | [EndLocation](../source-code-location/end-location.md) | Source code location information of the end of the node. |
-
-**Returns:** `void`
-___
diff --git a/packages/parse5/lib/common/doctype.js b/packages/parse5/lib/common/doctype.ts
similarity index 73%
rename from packages/parse5/lib/common/doctype.js
rename to packages/parse5/lib/common/doctype.ts
index e9dfb67d1caf2fb17f0efe9e5d18394e200f8981..a4c1b9382cdfab0b6a9f1901bf5add4be4db01b1 100644
--- a/packages/parse5/lib/common/doctype.js
+++ b/packages/parse5/lib/common/doctype.ts
@@ -1,6 +1,5 @@
-'use strict';
-
-const { DOCUMENT_MODE } = require('./html');
+import { DOCUMENT_MODE } from './html.js';
+import type { DoctypeToken } from './token.js';
//Const
const VALID_DOCTYPE_NAME = 'html';
@@ -62,65 +61,59 @@ const QUIRKS_MODE_PUBLIC_ID_PREFIXES = [
'-//w3c//dtd w3 html//',
'-//w3o//dtd w3 html 3.0//',
'-//webtechs//dtd mozilla html 2.0//',
- '-//webtechs//dtd mozilla html//'
+ '-//webtechs//dtd mozilla html//',
];
-const QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = QUIRKS_MODE_PUBLIC_ID_PREFIXES.concat([
+const QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [
+ ...QUIRKS_MODE_PUBLIC_ID_PREFIXES,
'-//w3c//dtd html 4.01 frameset//',
- '-//w3c//dtd html 4.01 transitional//'
-]);
+ '-//w3c//dtd html 4.01 transitional//',
+];
-const QUIRKS_MODE_PUBLIC_IDS = ['-//w3o//dtd w3 html strict 3.0//en//', '-/w3c/dtd html 4.0 transitional/en', 'html'];
+const QUIRKS_MODE_PUBLIC_IDS = new Set([
+ '-//w3o//dtd w3 html strict 3.0//en//',
+ '-/w3c/dtd html 4.0 transitional/en',
+ 'html',
+]);
const LIMITED_QUIRKS_PUBLIC_ID_PREFIXES = ['-//w3c//dtd xhtml 1.0 frameset//', '-//w3c//dtd xhtml 1.0 transitional//'];
-const LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES = LIMITED_QUIRKS_PUBLIC_ID_PREFIXES.concat([
+const LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES = [
+ ...LIMITED_QUIRKS_PUBLIC_ID_PREFIXES,
'-//w3c//dtd html 4.01 frameset//',
- '-//w3c//dtd html 4.01 transitional//'
-]);
+ '-//w3c//dtd html 4.01 transitional//',
+];
//Utils
-function enquoteDoctypeId(id) {
- const quote = id.indexOf('"') !== -1 ? "'" : '"';
-
- return quote + id + quote;
-}
-
-function hasPrefix(publicId, prefixes) {
- for (let i = 0; i < prefixes.length; i++) {
- if (publicId.indexOf(prefixes[i]) === 0) {
- return true;
- }
- }
-
- return false;
+function hasPrefix(publicId: string, prefixes: string[]): boolean {
+ return prefixes.some((prefix) => publicId.startsWith(prefix));
}
//API
-exports.isConforming = function(token) {
+export function isConforming(token: DoctypeToken): boolean {
return (
token.name === VALID_DOCTYPE_NAME &&
token.publicId === null &&
(token.systemId === null || token.systemId === VALID_SYSTEM_ID)
);
-};
+}
-exports.getDocumentMode = function(token) {
+export function getDocumentMode(token: DoctypeToken): DOCUMENT_MODE {
if (token.name !== VALID_DOCTYPE_NAME) {
return DOCUMENT_MODE.QUIRKS;
}
- const systemId = token.systemId;
+ const { systemId } = token;
if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) {
return DOCUMENT_MODE.QUIRKS;
}
- let publicId = token.publicId;
+ let { publicId } = token;
if (publicId !== null) {
publicId = publicId.toLowerCase();
- if (QUIRKS_MODE_PUBLIC_IDS.indexOf(publicId) > -1) {
+ if (QUIRKS_MODE_PUBLIC_IDS.has(publicId)) {
return DOCUMENT_MODE.QUIRKS;
}
@@ -139,24 +132,4 @@ exports.getDocumentMode = function(token) {
}
return DOCUMENT_MODE.NO_QUIRKS;
-};
-
-exports.serializeContent = function(name, publicId, systemId) {
- let str = '!DOCTYPE ';
-
- if (name) {
- str += name;
- }
-
- if (publicId) {
- str += ' PUBLIC ' + enquoteDoctypeId(publicId);
- } else if (systemId) {
- str += ' SYSTEM';
- }
-
- if (systemId !== null) {
- str += ' ' + enquoteDoctypeId(systemId);
- }
-
- return str;
-};
+}
diff --git a/packages/parse5/lib/common/error-codes.js b/packages/parse5/lib/common/error-codes.js
deleted file mode 100644
index 25f3cfdd629e82759e6234adf91e43cf785ac3b6..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/common/error-codes.js
+++ /dev/null
@@ -1,65 +0,0 @@
-'use strict';
-
-module.exports = {
- controlCharacterInInputStream: 'control-character-in-input-stream',
- noncharacterInInputStream: 'noncharacter-in-input-stream',
- surrogateInInputStream: 'surrogate-in-input-stream',
- nonVoidHtmlElementStartTagWithTrailingSolidus: 'non-void-html-element-start-tag-with-trailing-solidus',
- endTagWithAttributes: 'end-tag-with-attributes',
- endTagWithTrailingSolidus: 'end-tag-with-trailing-solidus',
- unexpectedSolidusInTag: 'unexpected-solidus-in-tag',
- unexpectedNullCharacter: 'unexpected-null-character',
- unexpectedQuestionMarkInsteadOfTagName: 'unexpected-question-mark-instead-of-tag-name',
- invalidFirstCharacterOfTagName: 'invalid-first-character-of-tag-name',
- unexpectedEqualsSignBeforeAttributeName: 'unexpected-equals-sign-before-attribute-name',
- missingEndTagName: 'missing-end-tag-name',
- unexpectedCharacterInAttributeName: 'unexpected-character-in-attribute-name',
- unknownNamedCharacterReference: 'unknown-named-character-reference',
- missingSemicolonAfterCharacterReference: 'missing-semicolon-after-character-reference',
- unexpectedCharacterAfterDoctypeSystemIdentifier: 'unexpected-character-after-doctype-system-identifier',
- unexpectedCharacterInUnquotedAttributeValue: 'unexpected-character-in-unquoted-attribute-value',
- eofBeforeTagName: 'eof-before-tag-name',
- eofInTag: 'eof-in-tag',
- missingAttributeValue: 'missing-attribute-value',
- missingWhitespaceBetweenAttributes: 'missing-whitespace-between-attributes',
- missingWhitespaceAfterDoctypePublicKeyword: 'missing-whitespace-after-doctype-public-keyword',
- missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers:
- 'missing-whitespace-between-doctype-public-and-system-identifiers',
- missingWhitespaceAfterDoctypeSystemKeyword: 'missing-whitespace-after-doctype-system-keyword',
- missingQuoteBeforeDoctypePublicIdentifier: 'missing-quote-before-doctype-public-identifier',
- missingQuoteBeforeDoctypeSystemIdentifier: 'missing-quote-before-doctype-system-identifier',
- missingDoctypePublicIdentifier: 'missing-doctype-public-identifier',
- missingDoctypeSystemIdentifier: 'missing-doctype-system-identifier',
- abruptDoctypePublicIdentifier: 'abrupt-doctype-public-identifier',
- abruptDoctypeSystemIdentifier: 'abrupt-doctype-system-identifier',
- cdataInHtmlContent: 'cdata-in-html-content',
- incorrectlyOpenedComment: 'incorrectly-opened-comment',
- eofInScriptHtmlCommentLikeText: 'eof-in-script-html-comment-like-text',
- eofInDoctype: 'eof-in-doctype',
- nestedComment: 'nested-comment',
- abruptClosingOfEmptyComment: 'abrupt-closing-of-empty-comment',
- eofInComment: 'eof-in-comment',
- incorrectlyClosedComment: 'incorrectly-closed-comment',
- eofInCdata: 'eof-in-cdata',
- absenceOfDigitsInNumericCharacterReference: 'absence-of-digits-in-numeric-character-reference',
- nullCharacterReference: 'null-character-reference',
- surrogateCharacterReference: 'surrogate-character-reference',
- characterReferenceOutsideUnicodeRange: 'character-reference-outside-unicode-range',
- controlCharacterReference: 'control-character-reference',
- noncharacterCharacterReference: 'noncharacter-character-reference',
- missingWhitespaceBeforeDoctypeName: 'missing-whitespace-before-doctype-name',
- missingDoctypeName: 'missing-doctype-name',
- invalidCharacterSequenceAfterDoctypeName: 'invalid-character-sequence-after-doctype-name',
- duplicateAttribute: 'duplicate-attribute',
- nonConformingDoctype: 'non-conforming-doctype',
- missingDoctype: 'missing-doctype',
- misplacedDoctype: 'misplaced-doctype',
- endTagWithoutMatchingOpenElement: 'end-tag-without-matching-open-element',
- closingOfElementWithOpenChildElements: 'closing-of-element-with-open-child-elements',
- disallowedContentInNoscriptInHead: 'disallowed-content-in-noscript-in-head',
- openElementsLeftAfterEof: 'open-elements-left-after-eof',
- abandonedHeadElementChild: 'abandoned-head-element-child',
- misplacedStartTagForHeadElement: 'misplaced-start-tag-for-head-element',
- nestedNoscriptInHead: 'nested-noscript-in-head',
- eofInElementThatCanContainOnlyText: 'eof-in-element-that-can-contain-only-text'
-};
diff --git a/packages/parse5/lib/common/error-codes.ts b/packages/parse5/lib/common/error-codes.ts
new file mode 100644
index 0000000000000000000000000000000000000000..e8387848f02ba0ea1477cdf8958cf89848bb032f
--- /dev/null
+++ b/packages/parse5/lib/common/error-codes.ts
@@ -0,0 +1,70 @@
+import type { Location } from './token.js';
+
+export interface ParserError extends Location {
+ code: ERR;
+}
+
+export type ParserErrorHandler = (error: ParserError) => void;
+
+export enum ERR {
+ controlCharacterInInputStream = 'control-character-in-input-stream',
+ noncharacterInInputStream = 'noncharacter-in-input-stream',
+ surrogateInInputStream = 'surrogate-in-input-stream',
+ nonVoidHtmlElementStartTagWithTrailingSolidus = 'non-void-html-element-start-tag-with-trailing-solidus',
+ endTagWithAttributes = 'end-tag-with-attributes',
+ endTagWithTrailingSolidus = 'end-tag-with-trailing-solidus',
+ unexpectedSolidusInTag = 'unexpected-solidus-in-tag',
+ unexpectedNullCharacter = 'unexpected-null-character',
+ unexpectedQuestionMarkInsteadOfTagName = 'unexpected-question-mark-instead-of-tag-name',
+ invalidFirstCharacterOfTagName = 'invalid-first-character-of-tag-name',
+ unexpectedEqualsSignBeforeAttributeName = 'unexpected-equals-sign-before-attribute-name',
+ missingEndTagName = 'missing-end-tag-name',
+ unexpectedCharacterInAttributeName = 'unexpected-character-in-attribute-name',
+ unknownNamedCharacterReference = 'unknown-named-character-reference',
+ missingSemicolonAfterCharacterReference = 'missing-semicolon-after-character-reference',
+ unexpectedCharacterAfterDoctypeSystemIdentifier = 'unexpected-character-after-doctype-system-identifier',
+ unexpectedCharacterInUnquotedAttributeValue = 'unexpected-character-in-unquoted-attribute-value',
+ eofBeforeTagName = 'eof-before-tag-name',
+ eofInTag = 'eof-in-tag',
+ missingAttributeValue = 'missing-attribute-value',
+ missingWhitespaceBetweenAttributes = 'missing-whitespace-between-attributes',
+ missingWhitespaceAfterDoctypePublicKeyword = 'missing-whitespace-after-doctype-public-keyword',
+ missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers = 'missing-whitespace-between-doctype-public-and-system-identifiers',
+ missingWhitespaceAfterDoctypeSystemKeyword = 'missing-whitespace-after-doctype-system-keyword',
+ missingQuoteBeforeDoctypePublicIdentifier = 'missing-quote-before-doctype-public-identifier',
+ missingQuoteBeforeDoctypeSystemIdentifier = 'missing-quote-before-doctype-system-identifier',
+ missingDoctypePublicIdentifier = 'missing-doctype-public-identifier',
+ missingDoctypeSystemIdentifier = 'missing-doctype-system-identifier',
+ abruptDoctypePublicIdentifier = 'abrupt-doctype-public-identifier',
+ abruptDoctypeSystemIdentifier = 'abrupt-doctype-system-identifier',
+ cdataInHtmlContent = 'cdata-in-html-content',
+ incorrectlyOpenedComment = 'incorrectly-opened-comment',
+ eofInScriptHtmlCommentLikeText = 'eof-in-script-html-comment-like-text',
+ eofInDoctype = 'eof-in-doctype',
+ nestedComment = 'nested-comment',
+ abruptClosingOfEmptyComment = 'abrupt-closing-of-empty-comment',
+ eofInComment = 'eof-in-comment',
+ incorrectlyClosedComment = 'incorrectly-closed-comment',
+ eofInCdata = 'eof-in-cdata',
+ absenceOfDigitsInNumericCharacterReference = 'absence-of-digits-in-numeric-character-reference',
+ nullCharacterReference = 'null-character-reference',
+ surrogateCharacterReference = 'surrogate-character-reference',
+ characterReferenceOutsideUnicodeRange = 'character-reference-outside-unicode-range',
+ controlCharacterReference = 'control-character-reference',
+ noncharacterCharacterReference = 'noncharacter-character-reference',
+ missingWhitespaceBeforeDoctypeName = 'missing-whitespace-before-doctype-name',
+ missingDoctypeName = 'missing-doctype-name',
+ invalidCharacterSequenceAfterDoctypeName = 'invalid-character-sequence-after-doctype-name',
+ duplicateAttribute = 'duplicate-attribute',
+ nonConformingDoctype = 'non-conforming-doctype',
+ missingDoctype = 'missing-doctype',
+ misplacedDoctype = 'misplaced-doctype',
+ endTagWithoutMatchingOpenElement = 'end-tag-without-matching-open-element',
+ closingOfElementWithOpenChildElements = 'closing-of-element-with-open-child-elements',
+ disallowedContentInNoscriptInHead = 'disallowed-content-in-noscript-in-head',
+ openElementsLeftAfterEof = 'open-elements-left-after-eof',
+ abandonedHeadElementChild = 'abandoned-head-element-child',
+ misplacedStartTagForHeadElement = 'misplaced-start-tag-for-head-element',
+ nestedNoscriptInHead = 'nested-noscript-in-head',
+ eofInElementThatCanContainOnlyText = 'eof-in-element-that-can-contain-only-text',
+}
diff --git a/packages/parse5/lib/common/foreign-content.js b/packages/parse5/lib/common/foreign-content.js
deleted file mode 100644
index 10f008b86813dabe0ac58db40772b54a45d17365..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/common/foreign-content.js
+++ /dev/null
@@ -1,265 +0,0 @@
-'use strict';
-
-const Tokenizer = require('../tokenizer');
-const HTML = require('./html');
-
-//Aliases
-const $ = HTML.TAG_NAMES;
-const NS = HTML.NAMESPACES;
-const ATTRS = HTML.ATTRS;
-
-//MIME types
-const MIME_TYPES = {
- TEXT_HTML: 'text/html',
- APPLICATION_XML: 'application/xhtml+xml'
-};
-
-//Attributes
-const DEFINITION_URL_ATTR = 'definitionurl';
-const ADJUSTED_DEFINITION_URL_ATTR = 'definitionURL';
-const SVG_ATTRS_ADJUSTMENT_MAP = {
- attributename: 'attributeName',
- attributetype: 'attributeType',
- basefrequency: 'baseFrequency',
- baseprofile: 'baseProfile',
- calcmode: 'calcMode',
- clippathunits: 'clipPathUnits',
- diffuseconstant: 'diffuseConstant',
- edgemode: 'edgeMode',
- filterunits: 'filterUnits',
- glyphref: 'glyphRef',
- gradienttransform: 'gradientTransform',
- gradientunits: 'gradientUnits',
- kernelmatrix: 'kernelMatrix',
- kernelunitlength: 'kernelUnitLength',
- keypoints: 'keyPoints',
- keysplines: 'keySplines',
- keytimes: 'keyTimes',
- lengthadjust: 'lengthAdjust',
- limitingconeangle: 'limitingConeAngle',
- markerheight: 'markerHeight',
- markerunits: 'markerUnits',
- markerwidth: 'markerWidth',
- maskcontentunits: 'maskContentUnits',
- maskunits: 'maskUnits',
- numoctaves: 'numOctaves',
- pathlength: 'pathLength',
- patterncontentunits: 'patternContentUnits',
- patterntransform: 'patternTransform',
- patternunits: 'patternUnits',
- pointsatx: 'pointsAtX',
- pointsaty: 'pointsAtY',
- pointsatz: 'pointsAtZ',
- preservealpha: 'preserveAlpha',
- preserveaspectratio: 'preserveAspectRatio',
- primitiveunits: 'primitiveUnits',
- refx: 'refX',
- refy: 'refY',
- repeatcount: 'repeatCount',
- repeatdur: 'repeatDur',
- requiredextensions: 'requiredExtensions',
- requiredfeatures: 'requiredFeatures',
- specularconstant: 'specularConstant',
- specularexponent: 'specularExponent',
- spreadmethod: 'spreadMethod',
- startoffset: 'startOffset',
- stddeviation: 'stdDeviation',
- stitchtiles: 'stitchTiles',
- surfacescale: 'surfaceScale',
- systemlanguage: 'systemLanguage',
- tablevalues: 'tableValues',
- targetx: 'targetX',
- targety: 'targetY',
- textlength: 'textLength',
- viewbox: 'viewBox',
- viewtarget: 'viewTarget',
- xchannelselector: 'xChannelSelector',
- ychannelselector: 'yChannelSelector',
- zoomandpan: 'zoomAndPan'
-};
-
-const XML_ATTRS_ADJUSTMENT_MAP = {
- 'xlink:actuate': { prefix: 'xlink', name: 'actuate', namespace: NS.XLINK },
- 'xlink:arcrole': { prefix: 'xlink', name: 'arcrole', namespace: NS.XLINK },
- 'xlink:href': { prefix: 'xlink', name: 'href', namespace: NS.XLINK },
- 'xlink:role': { prefix: 'xlink', name: 'role', namespace: NS.XLINK },
- 'xlink:show': { prefix: 'xlink', name: 'show', namespace: NS.XLINK },
- 'xlink:title': { prefix: 'xlink', name: 'title', namespace: NS.XLINK },
- 'xlink:type': { prefix: 'xlink', name: 'type', namespace: NS.XLINK },
- 'xml:base': { prefix: 'xml', name: 'base', namespace: NS.XML },
- 'xml:lang': { prefix: 'xml', name: 'lang', namespace: NS.XML },
- 'xml:space': { prefix: 'xml', name: 'space', namespace: NS.XML },
- xmlns: { prefix: '', name: 'xmlns', namespace: NS.XMLNS },
- 'xmlns:xlink': { prefix: 'xmlns', name: 'xlink', namespace: NS.XMLNS }
-};
-
-//SVG tag names adjustment map
-const SVG_TAG_NAMES_ADJUSTMENT_MAP = (exports.SVG_TAG_NAMES_ADJUSTMENT_MAP = {
- altglyph: 'altGlyph',
- altglyphdef: 'altGlyphDef',
- altglyphitem: 'altGlyphItem',
- animatecolor: 'animateColor',
- animatemotion: 'animateMotion',
- animatetransform: 'animateTransform',
- clippath: 'clipPath',
- feblend: 'feBlend',
- fecolormatrix: 'feColorMatrix',
- fecomponenttransfer: 'feComponentTransfer',
- fecomposite: 'feComposite',
- feconvolvematrix: 'feConvolveMatrix',
- fediffuselighting: 'feDiffuseLighting',
- fedisplacementmap: 'feDisplacementMap',
- fedistantlight: 'feDistantLight',
- feflood: 'feFlood',
- fefunca: 'feFuncA',
- fefuncb: 'feFuncB',
- fefuncg: 'feFuncG',
- fefuncr: 'feFuncR',
- fegaussianblur: 'feGaussianBlur',
- feimage: 'feImage',
- femerge: 'feMerge',
- femergenode: 'feMergeNode',
- femorphology: 'feMorphology',
- feoffset: 'feOffset',
- fepointlight: 'fePointLight',
- fespecularlighting: 'feSpecularLighting',
- fespotlight: 'feSpotLight',
- fetile: 'feTile',
- feturbulence: 'feTurbulence',
- foreignobject: 'foreignObject',
- glyphref: 'glyphRef',
- lineargradient: 'linearGradient',
- radialgradient: 'radialGradient',
- textpath: 'textPath'
-});
-
-//Tags that causes exit from foreign content
-const EXITS_FOREIGN_CONTENT = {
- [$.B]: true,
- [$.BIG]: true,
- [$.BLOCKQUOTE]: true,
- [$.BODY]: true,
- [$.BR]: true,
- [$.CENTER]: true,
- [$.CODE]: true,
- [$.DD]: true,
- [$.DIV]: true,
- [$.DL]: true,
- [$.DT]: true,
- [$.EM]: true,
- [$.EMBED]: true,
- [$.H1]: true,
- [$.H2]: true,
- [$.H3]: true,
- [$.H4]: true,
- [$.H5]: true,
- [$.H6]: true,
- [$.HEAD]: true,
- [$.HR]: true,
- [$.I]: true,
- [$.IMG]: true,
- [$.LI]: true,
- [$.LISTING]: true,
- [$.MENU]: true,
- [$.META]: true,
- [$.NOBR]: true,
- [$.OL]: true,
- [$.P]: true,
- [$.PRE]: true,
- [$.RUBY]: true,
- [$.S]: true,
- [$.SMALL]: true,
- [$.SPAN]: true,
- [$.STRONG]: true,
- [$.STRIKE]: true,
- [$.SUB]: true,
- [$.SUP]: true,
- [$.TABLE]: true,
- [$.TT]: true,
- [$.U]: true,
- [$.UL]: true,
- [$.VAR]: true
-};
-
-//Check exit from foreign content
-exports.causesExit = function(startTagToken) {
- const tn = startTagToken.tagName;
- const isFontWithAttrs =
- tn === $.FONT &&
- (Tokenizer.getTokenAttr(startTagToken, ATTRS.COLOR) !== null ||
- Tokenizer.getTokenAttr(startTagToken, ATTRS.SIZE) !== null ||
- Tokenizer.getTokenAttr(startTagToken, ATTRS.FACE) !== null);
-
- return isFontWithAttrs ? true : EXITS_FOREIGN_CONTENT[tn];
-};
-
-//Token adjustments
-exports.adjustTokenMathMLAttrs = function(token) {
- for (let i = 0; i < token.attrs.length; i++) {
- if (token.attrs[i].name === DEFINITION_URL_ATTR) {
- token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR;
- break;
- }
- }
-};
-
-exports.adjustTokenSVGAttrs = function(token) {
- for (let i = 0; i < token.attrs.length; i++) {
- const adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name];
-
- if (adjustedAttrName) {
- token.attrs[i].name = adjustedAttrName;
- }
- }
-};
-
-exports.adjustTokenXMLAttrs = function(token) {
- for (let i = 0; i < token.attrs.length; i++) {
- const adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name];
-
- if (adjustedAttrEntry) {
- token.attrs[i].prefix = adjustedAttrEntry.prefix;
- token.attrs[i].name = adjustedAttrEntry.name;
- token.attrs[i].namespace = adjustedAttrEntry.namespace;
- }
- }
-};
-
-exports.adjustTokenSVGTagName = function(token) {
- const adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP[token.tagName];
-
- if (adjustedTagName) {
- token.tagName = adjustedTagName;
- }
-};
-
-//Integration points
-function isMathMLTextIntegrationPoint(tn, ns) {
- return ns === NS.MATHML && (tn === $.MI || tn === $.MO || tn === $.MN || tn === $.MS || tn === $.MTEXT);
-}
-
-function isHtmlIntegrationPoint(tn, ns, attrs) {
- if (ns === NS.MATHML && tn === $.ANNOTATION_XML) {
- for (let i = 0; i < attrs.length; i++) {
- if (attrs[i].name === ATTRS.ENCODING) {
- const value = attrs[i].value.toLowerCase();
-
- return value === MIME_TYPES.TEXT_HTML || value === MIME_TYPES.APPLICATION_XML;
- }
- }
- }
-
- return ns === NS.SVG && (tn === $.FOREIGN_OBJECT || tn === $.DESC || tn === $.TITLE);
-}
-
-exports.isIntegrationPoint = function(tn, ns, attrs, foreignNS) {
- if ((!foreignNS || foreignNS === NS.HTML) && isHtmlIntegrationPoint(tn, ns, attrs)) {
- return true;
- }
-
- if ((!foreignNS || foreignNS === NS.MATHML) && isMathMLTextIntegrationPoint(tn, ns)) {
- return true;
- }
-
- return false;
-};
diff --git a/packages/parse5/lib/common/foreign-content.ts b/packages/parse5/lib/common/foreign-content.ts
new file mode 100644
index 0000000000000000000000000000000000000000..891e833299145bb79d5025a831f4c8e40aa490f4
--- /dev/null
+++ b/packages/parse5/lib/common/foreign-content.ts
@@ -0,0 +1,256 @@
+import { TAG_ID as $, NS, ATTRS, getTagID } from './html.js';
+import type { TagToken, Attribute } from './token.js';
+
+//MIME types
+const MIME_TYPES = {
+ TEXT_HTML: 'text/html',
+ APPLICATION_XML: 'application/xhtml+xml',
+};
+
+//Attributes
+const DEFINITION_URL_ATTR = 'definitionurl';
+const ADJUSTED_DEFINITION_URL_ATTR = 'definitionURL';
+const SVG_ATTRS_ADJUSTMENT_MAP = new Map(
+ [
+ 'attributeName',
+ 'attributeType',
+ 'baseFrequency',
+ 'baseProfile',
+ 'calcMode',
+ 'clipPathUnits',
+ 'diffuseConstant',
+ 'edgeMode',
+ 'filterUnits',
+ 'glyphRef',
+ 'gradientTransform',
+ 'gradientUnits',
+ 'kernelMatrix',
+ 'kernelUnitLength',
+ 'keyPoints',
+ 'keySplines',
+ 'keyTimes',
+ 'lengthAdjust',
+ 'limitingConeAngle',
+ 'markerHeight',
+ 'markerUnits',
+ 'markerWidth',
+ 'maskContentUnits',
+ 'maskUnits',
+ 'numOctaves',
+ 'pathLength',
+ 'patternContentUnits',
+ 'patternTransform',
+ 'patternUnits',
+ 'pointsAtX',
+ 'pointsAtY',
+ 'pointsAtZ',
+ 'preserveAlpha',
+ 'preserveAspectRatio',
+ 'primitiveUnits',
+ 'refX',
+ 'refY',
+ 'repeatCount',
+ 'repeatDur',
+ 'requiredExtensions',
+ 'requiredFeatures',
+ 'specularConstant',
+ 'specularExponent',
+ 'spreadMethod',
+ 'startOffset',
+ 'stdDeviation',
+ 'stitchTiles',
+ 'surfaceScale',
+ 'systemLanguage',
+ 'tableValues',
+ 'targetX',
+ 'targetY',
+ 'textLength',
+ 'viewBox',
+ 'viewTarget',
+ 'xChannelSelector',
+ 'yChannelSelector',
+ 'zoomAndPan',
+ ].map((attr) => [attr.toLowerCase(), attr])
+);
+
+const XML_ATTRS_ADJUSTMENT_MAP = new Map([
+ ['xlink:actuate', { prefix: 'xlink', name: 'actuate', namespace: NS.XLINK }],
+ ['xlink:arcrole', { prefix: 'xlink', name: 'arcrole', namespace: NS.XLINK }],
+ ['xlink:href', { prefix: 'xlink', name: 'href', namespace: NS.XLINK }],
+ ['xlink:role', { prefix: 'xlink', name: 'role', namespace: NS.XLINK }],
+ ['xlink:show', { prefix: 'xlink', name: 'show', namespace: NS.XLINK }],
+ ['xlink:title', { prefix: 'xlink', name: 'title', namespace: NS.XLINK }],
+ ['xlink:type', { prefix: 'xlink', name: 'type', namespace: NS.XLINK }],
+ ['xml:base', { prefix: 'xml', name: 'base', namespace: NS.XML }],
+ ['xml:lang', { prefix: 'xml', name: 'lang', namespace: NS.XML }],
+ ['xml:space', { prefix: 'xml', name: 'space', namespace: NS.XML }],
+ ['xmlns', { prefix: '', name: 'xmlns', namespace: NS.XMLNS }],
+ ['xmlns:xlink', { prefix: 'xmlns', name: 'xlink', namespace: NS.XMLNS }],
+]);
+
+//SVG tag names adjustment map
+export const SVG_TAG_NAMES_ADJUSTMENT_MAP = new Map(
+ [
+ 'altGlyph',
+ 'altGlyphDef',
+ 'altGlyphItem',
+ 'animateColor',
+ 'animateMotion',
+ 'animateTransform',
+ 'clipPath',
+ 'feBlend',
+ 'feColorMatrix',
+ 'feComponentTransfer',
+ 'feComposite',
+ 'feConvolveMatrix',
+ 'feDiffuseLighting',
+ 'feDisplacementMap',
+ 'feDistantLight',
+ 'feFlood',
+ 'feFuncA',
+ 'feFuncB',
+ 'feFuncG',
+ 'feFuncR',
+ 'feGaussianBlur',
+ 'feImage',
+ 'feMerge',
+ 'feMergeNode',
+ 'feMorphology',
+ 'feOffset',
+ 'fePointLight',
+ 'feSpecularLighting',
+ 'feSpotLight',
+ 'feTile',
+ 'feTurbulence',
+ 'foreignObject',
+ 'glyphRef',
+ 'linearGradient',
+ 'radialGradient',
+ 'textPath',
+ ].map((tn) => [tn.toLowerCase(), tn])
+);
+
+//Tags that causes exit from foreign content
+const EXITS_FOREIGN_CONTENT = new Set([
+ $.B,
+ $.BIG,
+ $.BLOCKQUOTE,
+ $.BODY,
+ $.BR,
+ $.CENTER,
+ $.CODE,
+ $.DD,
+ $.DIV,
+ $.DL,
+ $.DT,
+ $.EM,
+ $.EMBED,
+ $.H1,
+ $.H2,
+ $.H3,
+ $.H4,
+ $.H5,
+ $.H6,
+ $.HEAD,
+ $.HR,
+ $.I,
+ $.IMG,
+ $.LI,
+ $.LISTING,
+ $.MENU,
+ $.META,
+ $.NOBR,
+ $.OL,
+ $.P,
+ $.PRE,
+ $.RUBY,
+ $.S,
+ $.SMALL,
+ $.SPAN,
+ $.STRONG,
+ $.STRIKE,
+ $.SUB,
+ $.SUP,
+ $.TABLE,
+ $.TT,
+ $.U,
+ $.UL,
+ $.VAR,
+]);
+
+//Check exit from foreign content
+export function causesExit(startTagToken: TagToken): boolean {
+ const tn = startTagToken.tagID;
+ const isFontWithAttrs =
+ tn === $.FONT &&
+ startTagToken.attrs.some(({ name }) => name === ATTRS.COLOR || name === ATTRS.SIZE || name === ATTRS.FACE);
+
+ return isFontWithAttrs || EXITS_FOREIGN_CONTENT.has(tn);
+}
+
+//Token adjustments
+export function adjustTokenMathMLAttrs(token: TagToken): void {
+ for (let i = 0; i < token.attrs.length; i++) {
+ if (token.attrs[i].name === DEFINITION_URL_ATTR) {
+ token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR;
+ break;
+ }
+ }
+}
+
+export function adjustTokenSVGAttrs(token: TagToken): void {
+ for (let i = 0; i < token.attrs.length; i++) {
+ const adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP.get(token.attrs[i].name);
+
+ if (adjustedAttrName != null) {
+ token.attrs[i].name = adjustedAttrName;
+ }
+ }
+}
+
+export function adjustTokenXMLAttrs(token: TagToken): void {
+ for (let i = 0; i < token.attrs.length; i++) {
+ const adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP.get(token.attrs[i].name);
+
+ if (adjustedAttrEntry) {
+ token.attrs[i].prefix = adjustedAttrEntry.prefix;
+ token.attrs[i].name = adjustedAttrEntry.name;
+ token.attrs[i].namespace = adjustedAttrEntry.namespace;
+ }
+ }
+}
+
+export function adjustTokenSVGTagName(token: TagToken): void {
+ const adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName);
+
+ if (adjustedTagName != null) {
+ token.tagName = adjustedTagName;
+ token.tagID = getTagID(token.tagName);
+ }
+}
+
+//Integration points
+function isMathMLTextIntegrationPoint(tn: $, ns: NS): boolean {
+ return ns === NS.MATHML && (tn === $.MI || tn === $.MO || tn === $.MN || tn === $.MS || tn === $.MTEXT);
+}
+
+function isHtmlIntegrationPoint(tn: $, ns: NS, attrs: Attribute[]): boolean {
+ if (ns === NS.MATHML && tn === $.ANNOTATION_XML) {
+ for (let i = 0; i < attrs.length; i++) {
+ if (attrs[i].name === ATTRS.ENCODING) {
+ const value = attrs[i].value.toLowerCase();
+
+ return value === MIME_TYPES.TEXT_HTML || value === MIME_TYPES.APPLICATION_XML;
+ }
+ }
+ }
+
+ return ns === NS.SVG && (tn === $.FOREIGN_OBJECT || tn === $.DESC || tn === $.TITLE);
+}
+
+export function isIntegrationPoint(tn: $, ns: NS, attrs: Attribute[], foreignNS?: NS): boolean {
+ return (
+ ((!foreignNS || foreignNS === NS.HTML) && isHtmlIntegrationPoint(tn, ns, attrs)) ||
+ ((!foreignNS || foreignNS === NS.MATHML) && isMathMLTextIntegrationPoint(tn, ns))
+ );
+}
diff --git a/packages/parse5/lib/common/html.js b/packages/parse5/lib/common/html.js
deleted file mode 100644
index f33646f1194b4ed63037c39c5414d111248320f8..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/common/html.js
+++ /dev/null
@@ -1,272 +0,0 @@
-'use strict';
-
-const NS = (exports.NAMESPACES = {
- HTML: 'http://www.w3.org/1999/xhtml',
- MATHML: 'http://www.w3.org/1998/Math/MathML',
- SVG: 'http://www.w3.org/2000/svg',
- XLINK: 'http://www.w3.org/1999/xlink',
- XML: 'http://www.w3.org/XML/1998/namespace',
- XMLNS: 'http://www.w3.org/2000/xmlns/'
-});
-
-exports.ATTRS = {
- TYPE: 'type',
- ACTION: 'action',
- ENCODING: 'encoding',
- PROMPT: 'prompt',
- NAME: 'name',
- COLOR: 'color',
- FACE: 'face',
- SIZE: 'size'
-};
-
-exports.DOCUMENT_MODE = {
- NO_QUIRKS: 'no-quirks',
- QUIRKS: 'quirks',
- LIMITED_QUIRKS: 'limited-quirks'
-};
-
-const $ = (exports.TAG_NAMES = {
- A: 'a',
- ADDRESS: 'address',
- ANNOTATION_XML: 'annotation-xml',
- APPLET: 'applet',
- AREA: 'area',
- ARTICLE: 'article',
- ASIDE: 'aside',
-
- B: 'b',
- BASE: 'base',
- BASEFONT: 'basefont',
- BGSOUND: 'bgsound',
- BIG: 'big',
- BLOCKQUOTE: 'blockquote',
- BODY: 'body',
- BR: 'br',
- BUTTON: 'button',
-
- CAPTION: 'caption',
- CENTER: 'center',
- CODE: 'code',
- COL: 'col',
- COLGROUP: 'colgroup',
-
- DD: 'dd',
- DESC: 'desc',
- DETAILS: 'details',
- DIALOG: 'dialog',
- DIR: 'dir',
- DIV: 'div',
- DL: 'dl',
- DT: 'dt',
-
- EM: 'em',
- EMBED: 'embed',
-
- FIELDSET: 'fieldset',
- FIGCAPTION: 'figcaption',
- FIGURE: 'figure',
- FONT: 'font',
- FOOTER: 'footer',
- FOREIGN_OBJECT: 'foreignObject',
- FORM: 'form',
- FRAME: 'frame',
- FRAMESET: 'frameset',
-
- H1: 'h1',
- H2: 'h2',
- H3: 'h3',
- H4: 'h4',
- H5: 'h5',
- H6: 'h6',
- HEAD: 'head',
- HEADER: 'header',
- HGROUP: 'hgroup',
- HR: 'hr',
- HTML: 'html',
-
- I: 'i',
- IMG: 'img',
- IMAGE: 'image',
- INPUT: 'input',
- IFRAME: 'iframe',
-
- KEYGEN: 'keygen',
-
- LABEL: 'label',
- LI: 'li',
- LINK: 'link',
- LISTING: 'listing',
-
- MAIN: 'main',
- MALIGNMARK: 'malignmark',
- MARQUEE: 'marquee',
- MATH: 'math',
- MENU: 'menu',
- META: 'meta',
- MGLYPH: 'mglyph',
- MI: 'mi',
- MO: 'mo',
- MN: 'mn',
- MS: 'ms',
- MTEXT: 'mtext',
-
- NAV: 'nav',
- NOBR: 'nobr',
- NOFRAMES: 'noframes',
- NOEMBED: 'noembed',
- NOSCRIPT: 'noscript',
-
- OBJECT: 'object',
- OL: 'ol',
- OPTGROUP: 'optgroup',
- OPTION: 'option',
-
- P: 'p',
- PARAM: 'param',
- PLAINTEXT: 'plaintext',
- PRE: 'pre',
-
- RB: 'rb',
- RP: 'rp',
- RT: 'rt',
- RTC: 'rtc',
- RUBY: 'ruby',
-
- S: 's',
- SCRIPT: 'script',
- SECTION: 'section',
- SELECT: 'select',
- SOURCE: 'source',
- SMALL: 'small',
- SPAN: 'span',
- STRIKE: 'strike',
- STRONG: 'strong',
- STYLE: 'style',
- SUB: 'sub',
- SUMMARY: 'summary',
- SUP: 'sup',
-
- TABLE: 'table',
- TBODY: 'tbody',
- TEMPLATE: 'template',
- TEXTAREA: 'textarea',
- TFOOT: 'tfoot',
- TD: 'td',
- TH: 'th',
- THEAD: 'thead',
- TITLE: 'title',
- TR: 'tr',
- TRACK: 'track',
- TT: 'tt',
-
- U: 'u',
- UL: 'ul',
-
- SVG: 'svg',
-
- VAR: 'var',
-
- WBR: 'wbr',
-
- XMP: 'xmp'
-});
-
-exports.SPECIAL_ELEMENTS = {
- [NS.HTML]: {
- [$.ADDRESS]: true,
- [$.APPLET]: true,
- [$.AREA]: true,
- [$.ARTICLE]: true,
- [$.ASIDE]: true,
- [$.BASE]: true,
- [$.BASEFONT]: true,
- [$.BGSOUND]: true,
- [$.BLOCKQUOTE]: true,
- [$.BODY]: true,
- [$.BR]: true,
- [$.BUTTON]: true,
- [$.CAPTION]: true,
- [$.CENTER]: true,
- [$.COL]: true,
- [$.COLGROUP]: true,
- [$.DD]: true,
- [$.DETAILS]: true,
- [$.DIR]: true,
- [$.DIV]: true,
- [$.DL]: true,
- [$.DT]: true,
- [$.EMBED]: true,
- [$.FIELDSET]: true,
- [$.FIGCAPTION]: true,
- [$.FIGURE]: true,
- [$.FOOTER]: true,
- [$.FORM]: true,
- [$.FRAME]: true,
- [$.FRAMESET]: true,
- [$.H1]: true,
- [$.H2]: true,
- [$.H3]: true,
- [$.H4]: true,
- [$.H5]: true,
- [$.H6]: true,
- [$.HEAD]: true,
- [$.HEADER]: true,
- [$.HGROUP]: true,
- [$.HR]: true,
- [$.HTML]: true,
- [$.IFRAME]: true,
- [$.IMG]: true,
- [$.INPUT]: true,
- [$.LI]: true,
- [$.LINK]: true,
- [$.LISTING]: true,
- [$.MAIN]: true,
- [$.MARQUEE]: true,
- [$.MENU]: true,
- [$.META]: true,
- [$.NAV]: true,
- [$.NOEMBED]: true,
- [$.NOFRAMES]: true,
- [$.NOSCRIPT]: true,
- [$.OBJECT]: true,
- [$.OL]: true,
- [$.P]: true,
- [$.PARAM]: true,
- [$.PLAINTEXT]: true,
- [$.PRE]: true,
- [$.SCRIPT]: true,
- [$.SECTION]: true,
- [$.SELECT]: true,
- [$.SOURCE]: true,
- [$.STYLE]: true,
- [$.SUMMARY]: true,
- [$.TABLE]: true,
- [$.TBODY]: true,
- [$.TD]: true,
- [$.TEMPLATE]: true,
- [$.TEXTAREA]: true,
- [$.TFOOT]: true,
- [$.TH]: true,
- [$.THEAD]: true,
- [$.TITLE]: true,
- [$.TR]: true,
- [$.TRACK]: true,
- [$.UL]: true,
- [$.WBR]: true,
- [$.XMP]: true
- },
- [NS.MATHML]: {
- [$.MI]: true,
- [$.MO]: true,
- [$.MN]: true,
- [$.MS]: true,
- [$.MTEXT]: true,
- [$.ANNOTATION_XML]: true
- },
- [NS.SVG]: {
- [$.TITLE]: true,
- [$.FOREIGN_OBJECT]: true,
- [$.DESC]: true
- }
-};
diff --git a/packages/parse5/lib/common/html.ts b/packages/parse5/lib/common/html.ts
new file mode 100644
index 0000000000000000000000000000000000000000..48e27317f64e45ffbb55126a59d3179a99126158
--- /dev/null
+++ b/packages/parse5/lib/common/html.ts
@@ -0,0 +1,570 @@
+/** All valid namespaces in HTML. */
+export enum NS {
+ HTML = 'http://www.w3.org/1999/xhtml',
+ MATHML = 'http://www.w3.org/1998/Math/MathML',
+ SVG = 'http://www.w3.org/2000/svg',
+ XLINK = 'http://www.w3.org/1999/xlink',
+ XML = 'http://www.w3.org/XML/1998/namespace',
+ XMLNS = 'http://www.w3.org/2000/xmlns/',
+}
+
+export enum ATTRS {
+ TYPE = 'type',
+ ACTION = 'action',
+ ENCODING = 'encoding',
+ PROMPT = 'prompt',
+ NAME = 'name',
+ COLOR = 'color',
+ FACE = 'face',
+ SIZE = 'size',
+}
+
+/**
+ * The mode of the document.
+ *
+ * @see {@link https://dom.spec.whatwg.org/#concept-document-limited-quirks}
+ */
+export enum DOCUMENT_MODE {
+ NO_QUIRKS = 'no-quirks',
+ QUIRKS = 'quirks',
+ LIMITED_QUIRKS = 'limited-quirks',
+}
+
+export enum TAG_NAMES {
+ A = 'a',
+ ADDRESS = 'address',
+ ANNOTATION_XML = 'annotation-xml',
+ APPLET = 'applet',
+ AREA = 'area',
+ ARTICLE = 'article',
+ ASIDE = 'aside',
+
+ B = 'b',
+ BASE = 'base',
+ BASEFONT = 'basefont',
+ BGSOUND = 'bgsound',
+ BIG = 'big',
+ BLOCKQUOTE = 'blockquote',
+ BODY = 'body',
+ BR = 'br',
+ BUTTON = 'button',
+
+ CAPTION = 'caption',
+ CENTER = 'center',
+ CODE = 'code',
+ COL = 'col',
+ COLGROUP = 'colgroup',
+
+ DD = 'dd',
+ DESC = 'desc',
+ DETAILS = 'details',
+ DIALOG = 'dialog',
+ DIR = 'dir',
+ DIV = 'div',
+ DL = 'dl',
+ DT = 'dt',
+
+ EM = 'em',
+ EMBED = 'embed',
+
+ FIELDSET = 'fieldset',
+ FIGCAPTION = 'figcaption',
+ FIGURE = 'figure',
+ FONT = 'font',
+ FOOTER = 'footer',
+ FOREIGN_OBJECT = 'foreignObject',
+ FORM = 'form',
+ FRAME = 'frame',
+ FRAMESET = 'frameset',
+
+ H1 = 'h1',
+ H2 = 'h2',
+ H3 = 'h3',
+ H4 = 'h4',
+ H5 = 'h5',
+ H6 = 'h6',
+ HEAD = 'head',
+ HEADER = 'header',
+ HGROUP = 'hgroup',
+ HR = 'hr',
+ HTML = 'html',
+
+ I = 'i',
+ IMG = 'img',
+ IMAGE = 'image',
+ INPUT = 'input',
+ IFRAME = 'iframe',
+
+ KEYGEN = 'keygen',
+
+ LABEL = 'label',
+ LI = 'li',
+ LINK = 'link',
+ LISTING = 'listing',
+
+ MAIN = 'main',
+ MALIGNMARK = 'malignmark',
+ MARQUEE = 'marquee',
+ MATH = 'math',
+ MENU = 'menu',
+ META = 'meta',
+ MGLYPH = 'mglyph',
+ MI = 'mi',
+ MO = 'mo',
+ MN = 'mn',
+ MS = 'ms',
+ MTEXT = 'mtext',
+
+ NAV = 'nav',
+ NOBR = 'nobr',
+ NOFRAMES = 'noframes',
+ NOEMBED = 'noembed',
+ NOSCRIPT = 'noscript',
+
+ OBJECT = 'object',
+ OL = 'ol',
+ OPTGROUP = 'optgroup',
+ OPTION = 'option',
+
+ P = 'p',
+ PARAM = 'param',
+ PLAINTEXT = 'plaintext',
+ PRE = 'pre',
+
+ RB = 'rb',
+ RP = 'rp',
+ RT = 'rt',
+ RTC = 'rtc',
+ RUBY = 'ruby',
+
+ S = 's',
+ SCRIPT = 'script',
+ SECTION = 'section',
+ SELECT = 'select',
+ SOURCE = 'source',
+ SMALL = 'small',
+ SPAN = 'span',
+ STRIKE = 'strike',
+ STRONG = 'strong',
+ STYLE = 'style',
+ SUB = 'sub',
+ SUMMARY = 'summary',
+ SUP = 'sup',
+
+ TABLE = 'table',
+ TBODY = 'tbody',
+ TEMPLATE = 'template',
+ TEXTAREA = 'textarea',
+ TFOOT = 'tfoot',
+ TD = 'td',
+ TH = 'th',
+ THEAD = 'thead',
+ TITLE = 'title',
+ TR = 'tr',
+ TRACK = 'track',
+ TT = 'tt',
+
+ U = 'u',
+ UL = 'ul',
+
+ SVG = 'svg',
+
+ VAR = 'var',
+
+ WBR = 'wbr',
+
+ XMP = 'xmp',
+}
+
+/**
+ * Tag IDs are numeric IDs for known tag names.
+ *
+ * We use tag IDs to improve the performance of tag name comparisons.
+ */
+export enum TAG_ID {
+ UNKNOWN,
+
+ A,
+ ADDRESS,
+ ANNOTATION_XML,
+ APPLET,
+ AREA,
+ ARTICLE,
+ ASIDE,
+
+ B,
+ BASE,
+ BASEFONT,
+ BGSOUND,
+ BIG,
+ BLOCKQUOTE,
+ BODY,
+ BR,
+ BUTTON,
+
+ CAPTION,
+ CENTER,
+ CODE,
+ COL,
+ COLGROUP,
+
+ DD,
+ DESC,
+ DETAILS,
+ DIALOG,
+ DIR,
+ DIV,
+ DL,
+ DT,
+
+ EM,
+ EMBED,
+
+ FIELDSET,
+ FIGCAPTION,
+ FIGURE,
+ FONT,
+ FOOTER,
+ FOREIGN_OBJECT,
+ FORM,
+ FRAME,
+ FRAMESET,
+
+ H1,
+ H2,
+ H3,
+ H4,
+ H5,
+ H6,
+ HEAD,
+ HEADER,
+ HGROUP,
+ HR,
+ HTML,
+
+ I,
+ IMG,
+ IMAGE,
+ INPUT,
+ IFRAME,
+
+ KEYGEN,
+
+ LABEL,
+ LI,
+ LINK,
+ LISTING,
+
+ MAIN,
+ MALIGNMARK,
+ MARQUEE,
+ MATH,
+ MENU,
+ META,
+ MGLYPH,
+ MI,
+ MO,
+ MN,
+ MS,
+ MTEXT,
+
+ NAV,
+ NOBR,
+ NOFRAMES,
+ NOEMBED,
+ NOSCRIPT,
+
+ OBJECT,
+ OL,
+ OPTGROUP,
+ OPTION,
+
+ P,
+ PARAM,
+ PLAINTEXT,
+ PRE,
+
+ RB,
+ RP,
+ RT,
+ RTC,
+ RUBY,
+
+ S,
+ SCRIPT,
+ SECTION,
+ SELECT,
+ SOURCE,
+ SMALL,
+ SPAN,
+ STRIKE,
+ STRONG,
+ STYLE,
+ SUB,
+ SUMMARY,
+ SUP,
+
+ TABLE,
+ TBODY,
+ TEMPLATE,
+ TEXTAREA,
+ TFOOT,
+ TD,
+ TH,
+ THEAD,
+ TITLE,
+ TR,
+ TRACK,
+ TT,
+
+ U,
+ UL,
+
+ SVG,
+
+ VAR,
+
+ WBR,
+
+ XMP,
+}
+
+const TAG_NAME_TO_ID = new Map([
+ [TAG_NAMES.A, TAG_ID.A],
+ [TAG_NAMES.ADDRESS, TAG_ID.ADDRESS],
+ [TAG_NAMES.ANNOTATION_XML, TAG_ID.ANNOTATION_XML],
+ [TAG_NAMES.APPLET, TAG_ID.APPLET],
+ [TAG_NAMES.AREA, TAG_ID.AREA],
+ [TAG_NAMES.ARTICLE, TAG_ID.ARTICLE],
+ [TAG_NAMES.ASIDE, TAG_ID.ASIDE],
+ [TAG_NAMES.B, TAG_ID.B],
+ [TAG_NAMES.BASE, TAG_ID.BASE],
+ [TAG_NAMES.BASEFONT, TAG_ID.BASEFONT],
+ [TAG_NAMES.BGSOUND, TAG_ID.BGSOUND],
+ [TAG_NAMES.BIG, TAG_ID.BIG],
+ [TAG_NAMES.BLOCKQUOTE, TAG_ID.BLOCKQUOTE],
+ [TAG_NAMES.BODY, TAG_ID.BODY],
+ [TAG_NAMES.BR, TAG_ID.BR],
+ [TAG_NAMES.BUTTON, TAG_ID.BUTTON],
+ [TAG_NAMES.CAPTION, TAG_ID.CAPTION],
+ [TAG_NAMES.CENTER, TAG_ID.CENTER],
+ [TAG_NAMES.CODE, TAG_ID.CODE],
+ [TAG_NAMES.COL, TAG_ID.COL],
+ [TAG_NAMES.COLGROUP, TAG_ID.COLGROUP],
+ [TAG_NAMES.DD, TAG_ID.DD],
+ [TAG_NAMES.DESC, TAG_ID.DESC],
+ [TAG_NAMES.DETAILS, TAG_ID.DETAILS],
+ [TAG_NAMES.DIALOG, TAG_ID.DIALOG],
+ [TAG_NAMES.DIR, TAG_ID.DIR],
+ [TAG_NAMES.DIV, TAG_ID.DIV],
+ [TAG_NAMES.DL, TAG_ID.DL],
+ [TAG_NAMES.DT, TAG_ID.DT],
+ [TAG_NAMES.EM, TAG_ID.EM],
+ [TAG_NAMES.EMBED, TAG_ID.EMBED],
+ [TAG_NAMES.FIELDSET, TAG_ID.FIELDSET],
+ [TAG_NAMES.FIGCAPTION, TAG_ID.FIGCAPTION],
+ [TAG_NAMES.FIGURE, TAG_ID.FIGURE],
+ [TAG_NAMES.FONT, TAG_ID.FONT],
+ [TAG_NAMES.FOOTER, TAG_ID.FOOTER],
+ [TAG_NAMES.FOREIGN_OBJECT, TAG_ID.FOREIGN_OBJECT],
+ [TAG_NAMES.FORM, TAG_ID.FORM],
+ [TAG_NAMES.FRAME, TAG_ID.FRAME],
+ [TAG_NAMES.FRAMESET, TAG_ID.FRAMESET],
+ [TAG_NAMES.H1, TAG_ID.H1],
+ [TAG_NAMES.H2, TAG_ID.H2],
+ [TAG_NAMES.H3, TAG_ID.H3],
+ [TAG_NAMES.H4, TAG_ID.H4],
+ [TAG_NAMES.H5, TAG_ID.H5],
+ [TAG_NAMES.H6, TAG_ID.H6],
+ [TAG_NAMES.HEAD, TAG_ID.HEAD],
+ [TAG_NAMES.HEADER, TAG_ID.HEADER],
+ [TAG_NAMES.HGROUP, TAG_ID.HGROUP],
+ [TAG_NAMES.HR, TAG_ID.HR],
+ [TAG_NAMES.HTML, TAG_ID.HTML],
+ [TAG_NAMES.I, TAG_ID.I],
+ [TAG_NAMES.IMG, TAG_ID.IMG],
+ [TAG_NAMES.IMAGE, TAG_ID.IMAGE],
+ [TAG_NAMES.INPUT, TAG_ID.INPUT],
+ [TAG_NAMES.IFRAME, TAG_ID.IFRAME],
+ [TAG_NAMES.KEYGEN, TAG_ID.KEYGEN],
+ [TAG_NAMES.LABEL, TAG_ID.LABEL],
+ [TAG_NAMES.LI, TAG_ID.LI],
+ [TAG_NAMES.LINK, TAG_ID.LINK],
+ [TAG_NAMES.LISTING, TAG_ID.LISTING],
+ [TAG_NAMES.MAIN, TAG_ID.MAIN],
+ [TAG_NAMES.MALIGNMARK, TAG_ID.MALIGNMARK],
+ [TAG_NAMES.MARQUEE, TAG_ID.MARQUEE],
+ [TAG_NAMES.MATH, TAG_ID.MATH],
+ [TAG_NAMES.MENU, TAG_ID.MENU],
+ [TAG_NAMES.META, TAG_ID.META],
+ [TAG_NAMES.MGLYPH, TAG_ID.MGLYPH],
+ [TAG_NAMES.MI, TAG_ID.MI],
+ [TAG_NAMES.MO, TAG_ID.MO],
+ [TAG_NAMES.MN, TAG_ID.MN],
+ [TAG_NAMES.MS, TAG_ID.MS],
+ [TAG_NAMES.MTEXT, TAG_ID.MTEXT],
+ [TAG_NAMES.NAV, TAG_ID.NAV],
+ [TAG_NAMES.NOBR, TAG_ID.NOBR],
+ [TAG_NAMES.NOFRAMES, TAG_ID.NOFRAMES],
+ [TAG_NAMES.NOEMBED, TAG_ID.NOEMBED],
+ [TAG_NAMES.NOSCRIPT, TAG_ID.NOSCRIPT],
+ [TAG_NAMES.OBJECT, TAG_ID.OBJECT],
+ [TAG_NAMES.OL, TAG_ID.OL],
+ [TAG_NAMES.OPTGROUP, TAG_ID.OPTGROUP],
+ [TAG_NAMES.OPTION, TAG_ID.OPTION],
+ [TAG_NAMES.P, TAG_ID.P],
+ [TAG_NAMES.PARAM, TAG_ID.PARAM],
+ [TAG_NAMES.PLAINTEXT, TAG_ID.PLAINTEXT],
+ [TAG_NAMES.PRE, TAG_ID.PRE],
+ [TAG_NAMES.RB, TAG_ID.RB],
+ [TAG_NAMES.RP, TAG_ID.RP],
+ [TAG_NAMES.RT, TAG_ID.RT],
+ [TAG_NAMES.RTC, TAG_ID.RTC],
+ [TAG_NAMES.RUBY, TAG_ID.RUBY],
+ [TAG_NAMES.S, TAG_ID.S],
+ [TAG_NAMES.SCRIPT, TAG_ID.SCRIPT],
+ [TAG_NAMES.SECTION, TAG_ID.SECTION],
+ [TAG_NAMES.SELECT, TAG_ID.SELECT],
+ [TAG_NAMES.SOURCE, TAG_ID.SOURCE],
+ [TAG_NAMES.SMALL, TAG_ID.SMALL],
+ [TAG_NAMES.SPAN, TAG_ID.SPAN],
+ [TAG_NAMES.STRIKE, TAG_ID.STRIKE],
+ [TAG_NAMES.STRONG, TAG_ID.STRONG],
+ [TAG_NAMES.STYLE, TAG_ID.STYLE],
+ [TAG_NAMES.SUB, TAG_ID.SUB],
+ [TAG_NAMES.SUMMARY, TAG_ID.SUMMARY],
+ [TAG_NAMES.SUP, TAG_ID.SUP],
+ [TAG_NAMES.TABLE, TAG_ID.TABLE],
+ [TAG_NAMES.TBODY, TAG_ID.TBODY],
+ [TAG_NAMES.TEMPLATE, TAG_ID.TEMPLATE],
+ [TAG_NAMES.TEXTAREA, TAG_ID.TEXTAREA],
+ [TAG_NAMES.TFOOT, TAG_ID.TFOOT],
+ [TAG_NAMES.TD, TAG_ID.TD],
+ [TAG_NAMES.TH, TAG_ID.TH],
+ [TAG_NAMES.THEAD, TAG_ID.THEAD],
+ [TAG_NAMES.TITLE, TAG_ID.TITLE],
+ [TAG_NAMES.TR, TAG_ID.TR],
+ [TAG_NAMES.TRACK, TAG_ID.TRACK],
+ [TAG_NAMES.TT, TAG_ID.TT],
+ [TAG_NAMES.U, TAG_ID.U],
+ [TAG_NAMES.UL, TAG_ID.UL],
+ [TAG_NAMES.SVG, TAG_ID.SVG],
+ [TAG_NAMES.VAR, TAG_ID.VAR],
+ [TAG_NAMES.WBR, TAG_ID.WBR],
+ [TAG_NAMES.XMP, TAG_ID.XMP],
+]);
+
+export function getTagID(tagName: string): TAG_ID {
+ return TAG_NAME_TO_ID.get(tagName) ?? TAG_ID.UNKNOWN;
+}
+
+const $ = TAG_ID;
+
+export const SPECIAL_ELEMENTS: Record> = {
+ [NS.HTML]: new Set([
+ $.ADDRESS,
+ $.APPLET,
+ $.AREA,
+ $.ARTICLE,
+ $.ASIDE,
+ $.BASE,
+ $.BASEFONT,
+ $.BGSOUND,
+ $.BLOCKQUOTE,
+ $.BODY,
+ $.BR,
+ $.BUTTON,
+ $.CAPTION,
+ $.CENTER,
+ $.COL,
+ $.COLGROUP,
+ $.DD,
+ $.DETAILS,
+ $.DIR,
+ $.DIV,
+ $.DL,
+ $.DT,
+ $.EMBED,
+ $.FIELDSET,
+ $.FIGCAPTION,
+ $.FIGURE,
+ $.FOOTER,
+ $.FORM,
+ $.FRAME,
+ $.FRAMESET,
+ $.H1,
+ $.H2,
+ $.H3,
+ $.H4,
+ $.H5,
+ $.H6,
+ $.HEAD,
+ $.HEADER,
+ $.HGROUP,
+ $.HR,
+ $.HTML,
+ $.IFRAME,
+ $.IMG,
+ $.INPUT,
+ $.LI,
+ $.LINK,
+ $.LISTING,
+ $.MAIN,
+ $.MARQUEE,
+ $.MENU,
+ $.META,
+ $.NAV,
+ $.NOEMBED,
+ $.NOFRAMES,
+ $.NOSCRIPT,
+ $.OBJECT,
+ $.OL,
+ $.P,
+ $.PARAM,
+ $.PLAINTEXT,
+ $.PRE,
+ $.SCRIPT,
+ $.SECTION,
+ $.SELECT,
+ $.SOURCE,
+ $.STYLE,
+ $.SUMMARY,
+ $.TABLE,
+ $.TBODY,
+ $.TD,
+ $.TEMPLATE,
+ $.TEXTAREA,
+ $.TFOOT,
+ $.TH,
+ $.THEAD,
+ $.TITLE,
+ $.TR,
+ $.TRACK,
+ $.UL,
+ $.WBR,
+ $.XMP,
+ ]),
+ [NS.MATHML]: new Set([$.MI, $.MO, $.MN, $.MS, $.MTEXT, $.ANNOTATION_XML]),
+ [NS.SVG]: new Set([$.TITLE, $.FOREIGN_OBJECT, $.DESC]),
+ [NS.XLINK]: new Set(),
+ [NS.XML]: new Set(),
+ [NS.XMLNS]: new Set(),
+};
+
+export function isNumberedHeader(tn: TAG_ID): boolean {
+ return tn === $.H1 || tn === $.H2 || tn === $.H3 || tn === $.H4 || tn === $.H5 || tn === $.H6;
+}
+
+const UNESCAPED_TEXT = new Set([
+ TAG_NAMES.STYLE,
+ TAG_NAMES.SCRIPT,
+ TAG_NAMES.XMP,
+ TAG_NAMES.IFRAME,
+ TAG_NAMES.NOEMBED,
+ TAG_NAMES.NOFRAMES,
+ TAG_NAMES.PLAINTEXT,
+]);
+
+export function hasUnescapedText(tn: string, scriptingEnabled: boolean): boolean {
+ return UNESCAPED_TEXT.has(tn) || (scriptingEnabled && tn === TAG_NAMES.NOSCRIPT);
+}
diff --git a/packages/parse5/lib/common/token.ts b/packages/parse5/lib/common/token.ts
new file mode 100644
index 0000000000000000000000000000000000000000..4b1a1283df33eb181e7a646d9574a6326e9faa62
--- /dev/null
+++ b/packages/parse5/lib/common/token.ts
@@ -0,0 +1,104 @@
+import type { TAG_ID } from './html.js';
+
+export enum TokenType {
+ CHARACTER,
+ NULL_CHARACTER,
+ WHITESPACE_CHARACTER,
+ START_TAG,
+ END_TAG,
+ COMMENT,
+ DOCTYPE,
+ EOF,
+ HIBERNATION,
+}
+
+export interface Location {
+ /** One-based line index of the first character. */
+ startLine: number;
+ /** One-based column index of the first character. */
+ startCol: number;
+ /** Zero-based first character index. */
+ startOffset: number;
+ /** One-based line index of the last character. */
+ endLine: number;
+ /** One-based column index of the last character. Points directly *after* the last character. */
+ endCol: number;
+ /** Zero-based last character index. Points directly *after* the last character. */
+ endOffset: number;
+}
+
+export interface LocationWithAttributes extends Location {
+ /** Start tag attributes' location info. */
+ attrs?: Record;
+}
+
+export interface ElementLocation extends LocationWithAttributes {
+ /** Element's start tag location info. */
+ startTag?: Location;
+ /**
+ * Element's end tag location info.
+ * This property is undefined, if the element has no closing tag.
+ */
+ endTag?: Location;
+}
+
+interface TokenBase {
+ readonly type: TokenType;
+ location: Location | null;
+}
+
+export interface DoctypeToken extends TokenBase {
+ readonly type: TokenType.DOCTYPE;
+ name: string | null;
+ forceQuirks: boolean;
+ publicId: string | null;
+ systemId: string | null;
+}
+
+export interface Attribute {
+ /** The name of the attribute. */
+ name: string;
+ /** The namespace of the attribute. */
+ namespace?: string;
+ /** The namespace-related prefix of the attribute. */
+ prefix?: string;
+ /** The value of the attribute. */
+ value: string;
+}
+
+export interface TagToken extends TokenBase {
+ readonly type: TokenType.START_TAG | TokenType.END_TAG;
+ tagName: string;
+ /** Used to cache the ID of the tag name. */
+ tagID: TAG_ID;
+ selfClosing: boolean;
+ ackSelfClosing: boolean;
+ attrs: Attribute[];
+ location: LocationWithAttributes | null;
+}
+
+export function getTokenAttr(token: TagToken, attrName: string): string | null {
+ for (let i = token.attrs.length - 1; i >= 0; i--) {
+ if (token.attrs[i].name === attrName) {
+ return token.attrs[i].value;
+ }
+ }
+
+ return null;
+}
+
+export interface CommentToken extends TokenBase {
+ readonly type: TokenType.COMMENT;
+ data: string;
+}
+
+export interface EOFToken extends TokenBase {
+ readonly type: TokenType.EOF;
+}
+
+export interface CharacterToken extends TokenBase {
+ type: TokenType.CHARACTER | TokenType.NULL_CHARACTER | TokenType.WHITESPACE_CHARACTER;
+ chars: string;
+}
+
+export type Token = DoctypeToken | TagToken | CommentToken | EOFToken | CharacterToken;
diff --git a/packages/parse5/lib/common/unicode.js b/packages/parse5/lib/common/unicode.js
deleted file mode 100644
index 8d8234f13f0333a257e01ee7b090f43dae325504..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/common/unicode.js
+++ /dev/null
@@ -1,109 +0,0 @@
-'use strict';
-
-const UNDEFINED_CODE_POINTS = [
- 0xfffe,
- 0xffff,
- 0x1fffe,
- 0x1ffff,
- 0x2fffe,
- 0x2ffff,
- 0x3fffe,
- 0x3ffff,
- 0x4fffe,
- 0x4ffff,
- 0x5fffe,
- 0x5ffff,
- 0x6fffe,
- 0x6ffff,
- 0x7fffe,
- 0x7ffff,
- 0x8fffe,
- 0x8ffff,
- 0x9fffe,
- 0x9ffff,
- 0xafffe,
- 0xaffff,
- 0xbfffe,
- 0xbffff,
- 0xcfffe,
- 0xcffff,
- 0xdfffe,
- 0xdffff,
- 0xefffe,
- 0xeffff,
- 0xffffe,
- 0xfffff,
- 0x10fffe,
- 0x10ffff
-];
-
-exports.REPLACEMENT_CHARACTER = '\uFFFD';
-
-exports.CODE_POINTS = {
- EOF: -1,
- NULL: 0x00,
- TABULATION: 0x09,
- CARRIAGE_RETURN: 0x0d,
- LINE_FEED: 0x0a,
- FORM_FEED: 0x0c,
- SPACE: 0x20,
- EXCLAMATION_MARK: 0x21,
- QUOTATION_MARK: 0x22,
- NUMBER_SIGN: 0x23,
- AMPERSAND: 0x26,
- APOSTROPHE: 0x27,
- HYPHEN_MINUS: 0x2d,
- SOLIDUS: 0x2f,
- DIGIT_0: 0x30,
- DIGIT_9: 0x39,
- SEMICOLON: 0x3b,
- LESS_THAN_SIGN: 0x3c,
- EQUALS_SIGN: 0x3d,
- GREATER_THAN_SIGN: 0x3e,
- QUESTION_MARK: 0x3f,
- LATIN_CAPITAL_A: 0x41,
- LATIN_CAPITAL_F: 0x46,
- LATIN_CAPITAL_X: 0x58,
- LATIN_CAPITAL_Z: 0x5a,
- RIGHT_SQUARE_BRACKET: 0x5d,
- GRAVE_ACCENT: 0x60,
- LATIN_SMALL_A: 0x61,
- LATIN_SMALL_F: 0x66,
- LATIN_SMALL_X: 0x78,
- LATIN_SMALL_Z: 0x7a,
- REPLACEMENT_CHARACTER: 0xfffd
-};
-
-exports.CODE_POINT_SEQUENCES = {
- DASH_DASH_STRING: [0x2d, 0x2d], //--
- DOCTYPE_STRING: [0x44, 0x4f, 0x43, 0x54, 0x59, 0x50, 0x45], //DOCTYPE
- CDATA_START_STRING: [0x5b, 0x43, 0x44, 0x41, 0x54, 0x41, 0x5b], //[CDATA[
- SCRIPT_STRING: [0x73, 0x63, 0x72, 0x69, 0x70, 0x74], //script
- PUBLIC_STRING: [0x50, 0x55, 0x42, 0x4c, 0x49, 0x43], //PUBLIC
- SYSTEM_STRING: [0x53, 0x59, 0x53, 0x54, 0x45, 0x4d] //SYSTEM
-};
-
-//Surrogates
-exports.isSurrogate = function(cp) {
- return cp >= 0xd800 && cp <= 0xdfff;
-};
-
-exports.isSurrogatePair = function(cp) {
- return cp >= 0xdc00 && cp <= 0xdfff;
-};
-
-exports.getSurrogatePairCodePoint = function(cp1, cp2) {
- return (cp1 - 0xd800) * 0x400 + 0x2400 + cp2;
-};
-
-//NOTE: excluding NULL and ASCII whitespace
-exports.isControlCodePoint = function(cp) {
- return (
- (cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
- (cp >= 0x7f && cp <= 0x9f)
- );
-};
-
-exports.isUndefinedCodePoint = function(cp) {
- return (cp >= 0xfdd0 && cp <= 0xfdef) || UNDEFINED_CODE_POINTS.indexOf(cp) > -1;
-};
diff --git a/packages/parse5/lib/common/unicode.ts b/packages/parse5/lib/common/unicode.ts
new file mode 100644
index 0000000000000000000000000000000000000000..814f2e236140eaf13503300b003e5980f2271183
--- /dev/null
+++ b/packages/parse5/lib/common/unicode.ts
@@ -0,0 +1,77 @@
+const UNDEFINED_CODE_POINTS = new Set([
+ 0xff_fe, 0xff_ff, 0x1_ff_fe, 0x1_ff_ff, 0x2_ff_fe, 0x2_ff_ff, 0x3_ff_fe, 0x3_ff_ff, 0x4_ff_fe, 0x4_ff_ff, 0x5_ff_fe,
+ 0x5_ff_ff, 0x6_ff_fe, 0x6_ff_ff, 0x7_ff_fe, 0x7_ff_ff, 0x8_ff_fe, 0x8_ff_ff, 0x9_ff_fe, 0x9_ff_ff, 0xa_ff_fe,
+ 0xa_ff_ff, 0xb_ff_fe, 0xb_ff_ff, 0xc_ff_fe, 0xc_ff_ff, 0xd_ff_fe, 0xd_ff_ff, 0xe_ff_fe, 0xe_ff_ff, 0xf_ff_fe,
+ 0xf_ff_ff, 0x10_ff_fe, 0x10_ff_ff,
+]);
+
+export const REPLACEMENT_CHARACTER = '\uFFFD';
+
+export enum CODE_POINTS {
+ EOF = -1,
+ NULL = 0x00,
+ TABULATION = 0x09,
+ CARRIAGE_RETURN = 0x0d,
+ LINE_FEED = 0x0a,
+ FORM_FEED = 0x0c,
+ SPACE = 0x20,
+ EXCLAMATION_MARK = 0x21,
+ QUOTATION_MARK = 0x22,
+ NUMBER_SIGN = 0x23,
+ AMPERSAND = 0x26,
+ APOSTROPHE = 0x27,
+ HYPHEN_MINUS = 0x2d,
+ SOLIDUS = 0x2f,
+ DIGIT_0 = 0x30,
+ DIGIT_9 = 0x39,
+ SEMICOLON = 0x3b,
+ LESS_THAN_SIGN = 0x3c,
+ EQUALS_SIGN = 0x3d,
+ GREATER_THAN_SIGN = 0x3e,
+ QUESTION_MARK = 0x3f,
+ LATIN_CAPITAL_A = 0x41,
+ LATIN_CAPITAL_F = 0x46,
+ LATIN_CAPITAL_X = 0x58,
+ LATIN_CAPITAL_Z = 0x5a,
+ RIGHT_SQUARE_BRACKET = 0x5d,
+ GRAVE_ACCENT = 0x60,
+ LATIN_SMALL_A = 0x61,
+ LATIN_SMALL_F = 0x66,
+ LATIN_SMALL_X = 0x78,
+ LATIN_SMALL_Z = 0x7a,
+ REPLACEMENT_CHARACTER = 0xff_fd,
+}
+
+export const SEQUENCES = {
+ DASH_DASH: '--',
+ CDATA_START: '[CDATA[',
+ DOCTYPE: 'doctype',
+ SCRIPT: 'script',
+ PUBLIC: 'public',
+ SYSTEM: 'system',
+};
+
+//Surrogates
+export function isSurrogate(cp: number): boolean {
+ return cp >= 0xd8_00 && cp <= 0xdf_ff;
+}
+
+export function isSurrogatePair(cp: number): boolean {
+ return cp >= 0xdc_00 && cp <= 0xdf_ff;
+}
+
+export function getSurrogatePairCodePoint(cp1: number, cp2: number): number {
+ return (cp1 - 0xd8_00) * 0x4_00 + 0x24_00 + cp2;
+}
+
+//NOTE: excluding NULL and ASCII whitespace
+export function isControlCodePoint(cp: number): boolean {
+ return (
+ (cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
+ (cp >= 0x7f && cp <= 0x9f)
+ );
+}
+
+export function isUndefinedCodePoint(cp: number): boolean {
+ return (cp >= 0xfd_d0 && cp <= 0xfd_ef) || UNDEFINED_CODE_POINTS.has(cp);
+}
diff --git a/packages/parse5/lib/extensions/error-reporting/mixin-base.js b/packages/parse5/lib/extensions/error-reporting/mixin-base.js
deleted file mode 100644
index 1e30cfc12e175829c49af53da2a62cd48d104eed..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/extensions/error-reporting/mixin-base.js
+++ /dev/null
@@ -1,43 +0,0 @@
-'use strict';
-
-const Mixin = require('../../utils/mixin');
-
-class ErrorReportingMixinBase extends Mixin {
- constructor(host, opts) {
- super(host);
-
- this.posTracker = null;
- this.onParseError = opts.onParseError;
- }
-
- _setErrorLocation(err) {
- err.startLine = err.endLine = this.posTracker.line;
- err.startCol = err.endCol = this.posTracker.col;
- err.startOffset = err.endOffset = this.posTracker.offset;
- }
-
- _reportError(code) {
- const err = {
- code: code,
- startLine: -1,
- startCol: -1,
- startOffset: -1,
- endLine: -1,
- endCol: -1,
- endOffset: -1
- };
-
- this._setErrorLocation(err);
- this.onParseError(err);
- }
-
- _getOverriddenMethods(mxn) {
- return {
- _err(code) {
- mxn._reportError(code);
- }
- };
- }
-}
-
-module.exports = ErrorReportingMixinBase;
diff --git a/packages/parse5/lib/extensions/error-reporting/parser-mixin.js b/packages/parse5/lib/extensions/error-reporting/parser-mixin.js
deleted file mode 100644
index 107ec5a791a351ad5349ca948175ed8408ec4080..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/extensions/error-reporting/parser-mixin.js
+++ /dev/null
@@ -1,52 +0,0 @@
-'use strict';
-
-const ErrorReportingMixinBase = require('./mixin-base');
-const ErrorReportingTokenizerMixin = require('./tokenizer-mixin');
-const LocationInfoTokenizerMixin = require('../location-info/tokenizer-mixin');
-const Mixin = require('../../utils/mixin');
-
-class ErrorReportingParserMixin extends ErrorReportingMixinBase {
- constructor(parser, opts) {
- super(parser, opts);
-
- this.opts = opts;
- this.ctLoc = null;
- this.locBeforeToken = false;
- }
-
- _setErrorLocation(err) {
- if (this.ctLoc) {
- err.startLine = this.ctLoc.startLine;
- err.startCol = this.ctLoc.startCol;
- err.startOffset = this.ctLoc.startOffset;
-
- err.endLine = this.locBeforeToken ? this.ctLoc.startLine : this.ctLoc.endLine;
- err.endCol = this.locBeforeToken ? this.ctLoc.startCol : this.ctLoc.endCol;
- err.endOffset = this.locBeforeToken ? this.ctLoc.startOffset : this.ctLoc.endOffset;
- }
- }
-
- _getOverriddenMethods(mxn, orig) {
- return {
- _bootstrap(document, fragmentContext) {
- orig._bootstrap.call(this, document, fragmentContext);
-
- Mixin.install(this.tokenizer, ErrorReportingTokenizerMixin, mxn.opts);
- Mixin.install(this.tokenizer, LocationInfoTokenizerMixin);
- },
-
- _processInputToken(token) {
- mxn.ctLoc = token.location;
-
- orig._processInputToken.call(this, token);
- },
-
- _err(code, options) {
- mxn.locBeforeToken = options && options.beforeToken;
- mxn._reportError(code);
- }
- };
- }
-}
-
-module.exports = ErrorReportingParserMixin;
diff --git a/packages/parse5/lib/extensions/error-reporting/preprocessor-mixin.js b/packages/parse5/lib/extensions/error-reporting/preprocessor-mixin.js
deleted file mode 100644
index 398c9661484cc224d590a6054633f3ce444e6e2a..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/extensions/error-reporting/preprocessor-mixin.js
+++ /dev/null
@@ -1,24 +0,0 @@
-'use strict';
-
-const ErrorReportingMixinBase = require('./mixin-base');
-const PositionTrackingPreprocessorMixin = require('../position-tracking/preprocessor-mixin');
-const Mixin = require('../../utils/mixin');
-
-class ErrorReportingPreprocessorMixin extends ErrorReportingMixinBase {
- constructor(preprocessor, opts) {
- super(preprocessor, opts);
-
- this.posTracker = Mixin.install(preprocessor, PositionTrackingPreprocessorMixin);
- this.lastErrOffset = -1;
- }
-
- _reportError(code) {
- //NOTE: avoid reporting error twice on advance/retreat
- if (this.lastErrOffset !== this.posTracker.offset) {
- this.lastErrOffset = this.posTracker.offset;
- super._reportError(code);
- }
- }
-}
-
-module.exports = ErrorReportingPreprocessorMixin;
diff --git a/packages/parse5/lib/extensions/error-reporting/tokenizer-mixin.js b/packages/parse5/lib/extensions/error-reporting/tokenizer-mixin.js
deleted file mode 100644
index 219fcabf73975e2a1610b5bdf0008272c44768fa..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/extensions/error-reporting/tokenizer-mixin.js
+++ /dev/null
@@ -1,17 +0,0 @@
-'use strict';
-
-const ErrorReportingMixinBase = require('./mixin-base');
-const ErrorReportingPreprocessorMixin = require('./preprocessor-mixin');
-const Mixin = require('../../utils/mixin');
-
-class ErrorReportingTokenizerMixin extends ErrorReportingMixinBase {
- constructor(tokenizer, opts) {
- super(tokenizer, opts);
-
- const preprocessorMixin = Mixin.install(tokenizer.preprocessor, ErrorReportingPreprocessorMixin, opts);
-
- this.posTracker = preprocessorMixin.posTracker;
- }
-}
-
-module.exports = ErrorReportingTokenizerMixin;
diff --git a/packages/parse5/lib/extensions/location-info/open-element-stack-mixin.js b/packages/parse5/lib/extensions/location-info/open-element-stack-mixin.js
deleted file mode 100644
index 765fe77c52afaf809d8c358ffa120f4fff0754ec..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/extensions/location-info/open-element-stack-mixin.js
+++ /dev/null
@@ -1,35 +0,0 @@
-'use strict';
-
-const Mixin = require('../../utils/mixin');
-
-class LocationInfoOpenElementStackMixin extends Mixin {
- constructor(stack, opts) {
- super(stack);
-
- this.onItemPop = opts.onItemPop;
- }
-
- _getOverriddenMethods(mxn, orig) {
- return {
- pop() {
- mxn.onItemPop(this.current);
- orig.pop.call(this);
- },
-
- popAllUpToHtmlElement() {
- for (let i = this.stackTop; i > 0; i--) {
- mxn.onItemPop(this.items[i]);
- }
-
- orig.popAllUpToHtmlElement.call(this);
- },
-
- remove(element) {
- mxn.onItemPop(this.current);
- orig.remove.call(this, element);
- }
- };
- }
-}
-
-module.exports = LocationInfoOpenElementStackMixin;
diff --git a/packages/parse5/lib/extensions/location-info/parser-mixin.js b/packages/parse5/lib/extensions/location-info/parser-mixin.js
deleted file mode 100644
index e7d3e2da1286336d3405b4dec4b8e14bf97834f7..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/extensions/location-info/parser-mixin.js
+++ /dev/null
@@ -1,223 +0,0 @@
-'use strict';
-
-const Mixin = require('../../utils/mixin');
-const Tokenizer = require('../../tokenizer');
-const LocationInfoTokenizerMixin = require('./tokenizer-mixin');
-const LocationInfoOpenElementStackMixin = require('./open-element-stack-mixin');
-const HTML = require('../../common/html');
-
-//Aliases
-const $ = HTML.TAG_NAMES;
-
-class LocationInfoParserMixin extends Mixin {
- constructor(parser) {
- super(parser);
-
- this.parser = parser;
- this.treeAdapter = this.parser.treeAdapter;
- this.posTracker = null;
- this.lastStartTagToken = null;
- this.lastFosterParentingLocation = null;
- this.currentToken = null;
- }
-
- _setStartLocation(element) {
- let loc = null;
-
- if (this.lastStartTagToken) {
- loc = Object.assign({}, this.lastStartTagToken.location);
- loc.startTag = this.lastStartTagToken.location;
- }
-
- this.treeAdapter.setNodeSourceCodeLocation(element, loc);
- }
-
- _setEndLocation(element, closingToken) {
- const loc = this.treeAdapter.getNodeSourceCodeLocation(element);
-
- if (loc) {
- if (closingToken.location) {
- const ctLoc = closingToken.location;
- const tn = this.treeAdapter.getTagName(element);
-
- // NOTE: For cases like
- First 'p' closes without a closing
- // tag and for cases like | - 'p' closes without a closing tag.
- const isClosingEndTag = closingToken.type === Tokenizer.END_TAG_TOKEN && tn === closingToken.tagName;
- const endLoc = {};
- if (isClosingEndTag) {
- endLoc.endTag = Object.assign({}, ctLoc);
- endLoc.endLine = ctLoc.endLine;
- endLoc.endCol = ctLoc.endCol;
- endLoc.endOffset = ctLoc.endOffset;
- } else {
- endLoc.endLine = ctLoc.startLine;
- endLoc.endCol = ctLoc.startCol;
- endLoc.endOffset = ctLoc.startOffset;
- }
-
- this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc);
- }
- }
- }
-
- _getOverriddenMethods(mxn, orig) {
- return {
- _bootstrap(document, fragmentContext) {
- orig._bootstrap.call(this, document, fragmentContext);
-
- mxn.lastStartTagToken = null;
- mxn.lastFosterParentingLocation = null;
- mxn.currentToken = null;
-
- const tokenizerMixin = Mixin.install(this.tokenizer, LocationInfoTokenizerMixin);
-
- mxn.posTracker = tokenizerMixin.posTracker;
-
- Mixin.install(this.openElements, LocationInfoOpenElementStackMixin, {
- onItemPop: function(element) {
- mxn._setEndLocation(element, mxn.currentToken);
- }
- });
- },
-
- _runParsingLoop(scriptHandler) {
- orig._runParsingLoop.call(this, scriptHandler);
-
- // NOTE: generate location info for elements
- // that remains on open element stack
- for (let i = this.openElements.stackTop; i >= 0; i--) {
- mxn._setEndLocation(this.openElements.items[i], mxn.currentToken);
- }
- },
-
- //Token processing
- _processTokenInForeignContent(token) {
- mxn.currentToken = token;
- orig._processTokenInForeignContent.call(this, token);
- },
-
- _processToken(token) {
- mxn.currentToken = token;
- orig._processToken.call(this, token);
-
- //NOTE: and are never popped from the stack, so we need to updated
- //their end location explicitly.
- const requireExplicitUpdate =
- token.type === Tokenizer.END_TAG_TOKEN &&
- (token.tagName === $.HTML || (token.tagName === $.BODY && this.openElements.hasInScope($.BODY)));
-
- if (requireExplicitUpdate) {
- for (let i = this.openElements.stackTop; i >= 0; i--) {
- const element = this.openElements.items[i];
-
- if (this.treeAdapter.getTagName(element) === token.tagName) {
- mxn._setEndLocation(element, token);
- break;
- }
- }
- }
- },
-
- //Doctype
- _setDocumentType(token) {
- orig._setDocumentType.call(this, token);
-
- const documentChildren = this.treeAdapter.getChildNodes(this.document);
- const cnLength = documentChildren.length;
-
- for (let i = 0; i < cnLength; i++) {
- const node = documentChildren[i];
-
- if (this.treeAdapter.isDocumentTypeNode(node)) {
- this.treeAdapter.setNodeSourceCodeLocation(node, token.location);
- break;
- }
- }
- },
-
- //Elements
- _attachElementToTree(element) {
- //NOTE: _attachElementToTree is called from _appendElement, _insertElement and _insertTemplate methods.
- //So we will use token location stored in this methods for the element.
- mxn._setStartLocation(element);
- mxn.lastStartTagToken = null;
- orig._attachElementToTree.call(this, element);
- },
-
- _appendElement(token, namespaceURI) {
- mxn.lastStartTagToken = token;
- orig._appendElement.call(this, token, namespaceURI);
- },
-
- _insertElement(token, namespaceURI) {
- mxn.lastStartTagToken = token;
- orig._insertElement.call(this, token, namespaceURI);
- },
-
- _insertTemplate(token) {
- mxn.lastStartTagToken = token;
- orig._insertTemplate.call(this, token);
-
- const tmplContent = this.treeAdapter.getTemplateContent(this.openElements.current);
-
- this.treeAdapter.setNodeSourceCodeLocation(tmplContent, null);
- },
-
- _insertFakeRootElement() {
- orig._insertFakeRootElement.call(this);
- this.treeAdapter.setNodeSourceCodeLocation(this.openElements.current, null);
- },
-
- //Comments
- _appendCommentNode(token, parent) {
- orig._appendCommentNode.call(this, token, parent);
-
- const children = this.treeAdapter.getChildNodes(parent);
- const commentNode = children[children.length - 1];
-
- this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location);
- },
-
- //Text
- _findFosterParentingLocation() {
- //NOTE: store last foster parenting location, so we will be able to find inserted text
- //in case of foster parenting
- mxn.lastFosterParentingLocation = orig._findFosterParentingLocation.call(this);
-
- return mxn.lastFosterParentingLocation;
- },
-
- _insertCharacters(token) {
- orig._insertCharacters.call(this, token);
-
- const hasFosterParent = this._shouldFosterParentOnInsertion();
-
- const parent =
- (hasFosterParent && mxn.lastFosterParentingLocation.parent) ||
- this.openElements.currentTmplContent ||
- this.openElements.current;
-
- const siblings = this.treeAdapter.getChildNodes(parent);
-
- const textNodeIdx =
- hasFosterParent && mxn.lastFosterParentingLocation.beforeElement
- ? siblings.indexOf(mxn.lastFosterParentingLocation.beforeElement) - 1
- : siblings.length - 1;
-
- const textNode = siblings[textNodeIdx];
-
- //NOTE: if we have location assigned by another token, then just update end position
- const tnLoc = this.treeAdapter.getNodeSourceCodeLocation(textNode);
-
- if (tnLoc) {
- const { endLine, endCol, endOffset } = token.location;
- this.treeAdapter.updateNodeSourceCodeLocation(textNode, { endLine, endCol, endOffset });
- } else {
- this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location);
- }
- }
- };
- }
-}
-
-module.exports = LocationInfoParserMixin;
diff --git a/packages/parse5/lib/extensions/location-info/tokenizer-mixin.js b/packages/parse5/lib/extensions/location-info/tokenizer-mixin.js
deleted file mode 100644
index 3c1ef5fbb196472e434dbb4950da1d62f12e2890..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/extensions/location-info/tokenizer-mixin.js
+++ /dev/null
@@ -1,146 +0,0 @@
-'use strict';
-
-const Mixin = require('../../utils/mixin');
-const Tokenizer = require('../../tokenizer');
-const PositionTrackingPreprocessorMixin = require('../position-tracking/preprocessor-mixin');
-
-class LocationInfoTokenizerMixin extends Mixin {
- constructor(tokenizer) {
- super(tokenizer);
-
- this.tokenizer = tokenizer;
- this.posTracker = Mixin.install(tokenizer.preprocessor, PositionTrackingPreprocessorMixin);
- this.currentAttrLocation = null;
- this.ctLoc = null;
- }
-
- _getCurrentLocation() {
- return {
- startLine: this.posTracker.line,
- startCol: this.posTracker.col,
- startOffset: this.posTracker.offset,
- endLine: -1,
- endCol: -1,
- endOffset: -1
- };
- }
-
- _attachCurrentAttrLocationInfo() {
- this.currentAttrLocation.endLine = this.posTracker.line;
- this.currentAttrLocation.endCol = this.posTracker.col;
- this.currentAttrLocation.endOffset = this.posTracker.offset;
-
- const currentToken = this.tokenizer.currentToken;
- const currentAttr = this.tokenizer.currentAttr;
-
- if (!currentToken.location.attrs) {
- currentToken.location.attrs = Object.create(null);
- }
-
- currentToken.location.attrs[currentAttr.name] = this.currentAttrLocation;
- }
-
- _getOverriddenMethods(mxn, orig) {
- const methods = {
- _createStartTagToken() {
- orig._createStartTagToken.call(this);
- this.currentToken.location = mxn.ctLoc;
- },
-
- _createEndTagToken() {
- orig._createEndTagToken.call(this);
- this.currentToken.location = mxn.ctLoc;
- },
-
- _createCommentToken() {
- orig._createCommentToken.call(this);
- this.currentToken.location = mxn.ctLoc;
- },
-
- _createDoctypeToken(initialName) {
- orig._createDoctypeToken.call(this, initialName);
- this.currentToken.location = mxn.ctLoc;
- },
-
- _createCharacterToken(type, ch) {
- orig._createCharacterToken.call(this, type, ch);
- this.currentCharacterToken.location = mxn.ctLoc;
- },
-
- _createEOFToken() {
- orig._createEOFToken.call(this);
- this.currentToken.location = mxn._getCurrentLocation();
- },
-
- _createAttr(attrNameFirstCh) {
- orig._createAttr.call(this, attrNameFirstCh);
- mxn.currentAttrLocation = mxn._getCurrentLocation();
- },
-
- _leaveAttrName(toState) {
- orig._leaveAttrName.call(this, toState);
- mxn._attachCurrentAttrLocationInfo();
- },
-
- _leaveAttrValue(toState) {
- orig._leaveAttrValue.call(this, toState);
- mxn._attachCurrentAttrLocationInfo();
- },
-
- _emitCurrentToken() {
- const ctLoc = this.currentToken.location;
-
- //NOTE: if we have pending character token make it's end location equal to the
- //current token's start location.
- if (this.currentCharacterToken) {
- this.currentCharacterToken.location.endLine = ctLoc.startLine;
- this.currentCharacterToken.location.endCol = ctLoc.startCol;
- this.currentCharacterToken.location.endOffset = ctLoc.startOffset;
- }
-
- if (this.currentToken.type === Tokenizer.EOF_TOKEN) {
- ctLoc.endLine = ctLoc.startLine;
- ctLoc.endCol = ctLoc.startCol;
- ctLoc.endOffset = ctLoc.startOffset;
- } else {
- ctLoc.endLine = mxn.posTracker.line;
- ctLoc.endCol = mxn.posTracker.col + 1;
- ctLoc.endOffset = mxn.posTracker.offset + 1;
- }
-
- orig._emitCurrentToken.call(this);
- },
-
- _emitCurrentCharacterToken() {
- const ctLoc = this.currentCharacterToken && this.currentCharacterToken.location;
-
- //NOTE: if we have character token and it's location wasn't set in the _emitCurrentToken(),
- //then set it's location at the current preprocessor position.
- //We don't need to increment preprocessor position, since character token
- //emission is always forced by the start of the next character token here.
- //So, we already have advanced position.
- if (ctLoc && ctLoc.endOffset === -1) {
- ctLoc.endLine = mxn.posTracker.line;
- ctLoc.endCol = mxn.posTracker.col;
- ctLoc.endOffset = mxn.posTracker.offset;
- }
-
- orig._emitCurrentCharacterToken.call(this);
- }
- };
-
- //NOTE: patch initial states for each mode to obtain token start position
- Object.keys(Tokenizer.MODE).forEach(modeName => {
- const state = Tokenizer.MODE[modeName];
-
- methods[state] = function(cp) {
- mxn.ctLoc = mxn._getCurrentLocation();
- orig[state].call(this, cp);
- };
- });
-
- return methods;
- }
-}
-
-module.exports = LocationInfoTokenizerMixin;
diff --git a/packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.js b/packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.js
deleted file mode 100644
index 3a07d780176411a08b05bc4cf32853a01c855ce0..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.js
+++ /dev/null
@@ -1,64 +0,0 @@
-'use strict';
-
-const Mixin = require('../../utils/mixin');
-
-class PositionTrackingPreprocessorMixin extends Mixin {
- constructor(preprocessor) {
- super(preprocessor);
-
- this.preprocessor = preprocessor;
- this.isEol = false;
- this.lineStartPos = 0;
- this.droppedBufferSize = 0;
-
- this.offset = 0;
- this.col = 0;
- this.line = 1;
- }
-
- _getOverriddenMethods(mxn, orig) {
- return {
- advance() {
- const pos = this.pos + 1;
- const ch = this.html[pos];
-
- //NOTE: LF should be in the last column of the line
- if (mxn.isEol) {
- mxn.isEol = false;
- mxn.line++;
- mxn.lineStartPos = pos;
- }
-
- if (ch === '\n' || (ch === '\r' && this.html[pos + 1] !== '\n')) {
- mxn.isEol = true;
- }
-
- mxn.col = pos - mxn.lineStartPos + 1;
- mxn.offset = mxn.droppedBufferSize + pos;
-
- return orig.advance.call(this);
- },
-
- retreat() {
- orig.retreat.call(this);
-
- mxn.isEol = false;
- mxn.col = this.pos - mxn.lineStartPos + 1;
- },
-
- dropParsedChunk() {
- const prevPos = this.pos;
-
- orig.dropParsedChunk.call(this);
-
- const reduction = prevPos - this.pos;
-
- mxn.lineStartPos -= reduction;
- mxn.droppedBufferSize += reduction;
- mxn.offset = mxn.droppedBufferSize + this.pos;
- }
- };
- }
-}
-
-module.exports = PositionTrackingPreprocessorMixin;
diff --git a/packages/parse5/lib/index.js b/packages/parse5/lib/index.js
deleted file mode 100644
index 09c8e339284144b6227f8580372169775a257537..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/index.js
+++ /dev/null
@@ -1,29 +0,0 @@
-'use strict';
-
-const Parser = require('./parser');
-const Serializer = require('./serializer');
-
-// Shorthands
-exports.parse = function parse(html, options) {
- const parser = new Parser(options);
-
- return parser.parse(html);
-};
-
-exports.parseFragment = function parseFragment(fragmentContext, html, options) {
- if (typeof fragmentContext === 'string') {
- options = html;
- html = fragmentContext;
- fragmentContext = null;
- }
-
- const parser = new Parser(options);
-
- return parser.parseFragment(html, fragmentContext);
-};
-
-exports.serialize = function(node, options) {
- const serializer = new Serializer(node, options);
-
- return serializer.serialize();
-};
diff --git a/packages/parse5/lib/index.ts b/packages/parse5/lib/index.ts
new file mode 100644
index 0000000000000000000000000000000000000000..cb382d816799de5b4664a9c2e99032832109c5d0
--- /dev/null
+++ b/packages/parse5/lib/index.ts
@@ -0,0 +1,95 @@
+import { Parser, type ParserOptions } from './parser/index.js';
+
+import type { DefaultTreeAdapterMap } from './tree-adapters/default.js';
+import type { TreeAdapterTypeMap } from './tree-adapters/interface.js';
+
+export { type DefaultTreeAdapterMap, defaultTreeAdapter } from './tree-adapters/default.js';
+export type { TreeAdapter, TreeAdapterTypeMap } from './tree-adapters/interface.js';
+export { type ParserOptions, /** @internal */ Parser } from './parser/index.js';
+export { serialize, serializeOuter, type SerializerOptions } from './serializer/index.js';
+export type { ParserError } from './common/error-codes.js';
+
+/** @internal */
+export * as foreignContent from './common/foreign-content.js';
+/** @internal */
+export * as html from './common/html.js';
+/** @internal */
+export * as Token from './common/token.js';
+/** @internal */
+export { Tokenizer, type TokenizerOptions, TokenizerMode, type TokenHandler } from './tokenizer/index.js';
+
+// Shorthands
+
+/**
+ * Parses an HTML string.
+ *
+ * @param html Input HTML string.
+ * @param options Parsing options.
+ * @returns Document
+ *
+ * @example
+ *
+ * ```js
+ * const parse5 = require('parse5');
+ *
+ * const document = parse5.parse('Hi there!');
+ *
+ * console.log(document.childNodes[1].tagName); //> 'html'
+ *```
+ */
+export function parse(
+ html: string,
+ options?: ParserOptions
+): T['document'] {
+ return Parser.parse(html, options);
+}
+
+/**
+ * Parses an HTML fragment.
+ *
+ * @example
+ *
+ * ```js
+ * const parse5 = require('parse5');
+ *
+ * const documentFragment = parse5.parseFragment('');
+ *
+ * console.log(documentFragment.childNodes[0].tagName); //> 'table'
+ *
+ * // Parses the html fragment in the context of the parsed element.
+ * const trFragment = parser.parseFragment(documentFragment.childNodes[0], 'Shake it, baby |
');
+ *
+ * console.log(trFragment.childNodes[0].childNodes[0].tagName); //> 'td'
+ * ```
+ *
+ * @param fragmentContext Parsing context element. If specified, given fragment will be parsed as if it was set to the context element's `innerHTML` property.
+ * @param html Input HTML fragment string.
+ * @param options Parsing options.
+ * @returns DocumentFragment
+ */
+export function parseFragment(
+ fragmentContext: T['parentNode'] | null,
+ html: string,
+ options: ParserOptions
+): T['documentFragment'];
+export function parseFragment(
+ html: string,
+ options?: ParserOptions
+): T['documentFragment'];
+export function parseFragment(
+ fragmentContext: T['parentNode'] | null | string,
+ html?: string | ParserOptions,
+ options?: ParserOptions
+): T['documentFragment'] {
+ if (typeof fragmentContext === 'string') {
+ options = html as ParserOptions;
+ html = fragmentContext;
+ fragmentContext = null;
+ }
+
+ const parser = Parser.getFragmentParser(fragmentContext, options);
+
+ parser.tokenizer.write(html as string, true);
+
+ return parser.getFragment();
+}
diff --git a/packages/parse5/lib/parser/formatting-element-list.js b/packages/parse5/lib/parser/formatting-element-list.js
deleted file mode 100644
index 0e241dbfdb0104d435e2d48e9b7b840c023ab53d..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/parser/formatting-element-list.js
+++ /dev/null
@@ -1,181 +0,0 @@
-'use strict';
-
-//Const
-const NOAH_ARK_CAPACITY = 3;
-
-//List of formatting elements
-class FormattingElementList {
- constructor(treeAdapter) {
- this.length = 0;
- this.entries = [];
- this.treeAdapter = treeAdapter;
- this.bookmark = null;
- }
-
- //Noah Ark's condition
- //OPTIMIZATION: at first we try to find possible candidates for exclusion using
- //lightweight heuristics without thorough attributes check.
- _getNoahArkConditionCandidates(newElement) {
- const candidates = [];
-
- if (this.length >= NOAH_ARK_CAPACITY) {
- const neAttrsLength = this.treeAdapter.getAttrList(newElement).length;
- const neTagName = this.treeAdapter.getTagName(newElement);
- const neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement);
-
- for (let i = this.length - 1; i >= 0; i--) {
- const entry = this.entries[i];
-
- if (entry.type === FormattingElementList.MARKER_ENTRY) {
- break;
- }
-
- const element = entry.element;
- const elementAttrs = this.treeAdapter.getAttrList(element);
-
- const isCandidate =
- this.treeAdapter.getTagName(element) === neTagName &&
- this.treeAdapter.getNamespaceURI(element) === neNamespaceURI &&
- elementAttrs.length === neAttrsLength;
-
- if (isCandidate) {
- candidates.push({ idx: i, attrs: elementAttrs });
- }
- }
- }
-
- return candidates.length < NOAH_ARK_CAPACITY ? [] : candidates;
- }
-
- _ensureNoahArkCondition(newElement) {
- const candidates = this._getNoahArkConditionCandidates(newElement);
- let cLength = candidates.length;
-
- if (cLength) {
- const neAttrs = this.treeAdapter.getAttrList(newElement);
- const neAttrsLength = neAttrs.length;
- const neAttrsMap = Object.create(null);
-
- //NOTE: build attrs map for the new element so we can perform fast lookups
- for (let i = 0; i < neAttrsLength; i++) {
- const neAttr = neAttrs[i];
-
- neAttrsMap[neAttr.name] = neAttr.value;
- }
-
- for (let i = 0; i < neAttrsLength; i++) {
- for (let j = 0; j < cLength; j++) {
- const cAttr = candidates[j].attrs[i];
-
- if (neAttrsMap[cAttr.name] !== cAttr.value) {
- candidates.splice(j, 1);
- cLength--;
- }
-
- if (candidates.length < NOAH_ARK_CAPACITY) {
- return;
- }
- }
- }
-
- //NOTE: remove bottommost candidates until Noah's Ark condition will not be met
- for (let i = cLength - 1; i >= NOAH_ARK_CAPACITY - 1; i--) {
- this.entries.splice(candidates[i].idx, 1);
- this.length--;
- }
- }
- }
-
- //Mutations
- insertMarker() {
- this.entries.push({ type: FormattingElementList.MARKER_ENTRY });
- this.length++;
- }
-
- pushElement(element, token) {
- this._ensureNoahArkCondition(element);
-
- this.entries.push({
- type: FormattingElementList.ELEMENT_ENTRY,
- element: element,
- token: token
- });
-
- this.length++;
- }
-
- insertElementAfterBookmark(element, token) {
- let bookmarkIdx = this.length - 1;
-
- for (; bookmarkIdx >= 0; bookmarkIdx--) {
- if (this.entries[bookmarkIdx] === this.bookmark) {
- break;
- }
- }
-
- this.entries.splice(bookmarkIdx + 1, 0, {
- type: FormattingElementList.ELEMENT_ENTRY,
- element: element,
- token: token
- });
-
- this.length++;
- }
-
- removeEntry(entry) {
- for (let i = this.length - 1; i >= 0; i--) {
- if (this.entries[i] === entry) {
- this.entries.splice(i, 1);
- this.length--;
- break;
- }
- }
- }
-
- clearToLastMarker() {
- while (this.length) {
- const entry = this.entries.pop();
-
- this.length--;
-
- if (entry.type === FormattingElementList.MARKER_ENTRY) {
- break;
- }
- }
- }
-
- //Search
- getElementEntryInScopeWithTagName(tagName) {
- for (let i = this.length - 1; i >= 0; i--) {
- const entry = this.entries[i];
-
- if (entry.type === FormattingElementList.MARKER_ENTRY) {
- return null;
- }
-
- if (this.treeAdapter.getTagName(entry.element) === tagName) {
- return entry;
- }
- }
-
- return null;
- }
-
- getElementEntry(element) {
- for (let i = this.length - 1; i >= 0; i--) {
- const entry = this.entries[i];
-
- if (entry.type === FormattingElementList.ELEMENT_ENTRY && entry.element === element) {
- return entry;
- }
- }
-
- return null;
- }
-}
-
-//Entry types
-FormattingElementList.MARKER_ENTRY = 'MARKER_ENTRY';
-FormattingElementList.ELEMENT_ENTRY = 'ELEMENT_ENTRY';
-
-module.exports = FormattingElementList;
diff --git a/packages/parse5/test/formatting-element-list.test.js b/packages/parse5/lib/parser/formatting-element-list.test.ts
similarity index 45%
rename from packages/parse5/test/formatting-element-list.test.js
rename to packages/parse5/lib/parser/formatting-element-list.test.ts
index 6a3e683817bdb10eae989836f0849e9e950f0959..cd3f9af484971bb06fa99506eb8349c589c299fe 100644
--- a/packages/parse5/test/formatting-element-list.test.js
+++ b/packages/parse5/lib/parser/formatting-element-list.test.ts
@@ -1,84 +1,90 @@
-'use strict';
-
-const assert = require('assert');
-const HTML = require('../lib/common/html');
-const FormattingElementList = require('../lib/parser/formatting-element-list');
-const { generateTestsForEachTreeAdapter } = require('../../../test/utils/common');
-
-//Aliases
-const $ = HTML.TAG_NAMES;
-const NS = HTML.NAMESPACES;
+import * as assert from 'node:assert';
+import { TAG_NAMES as $, NS, getTagID } from '../common/html.js';
+import { type TagToken, TokenType } from '../common/token.js';
+import { FormattingElementList, EntryType } from './formatting-element-list.js';
+import { generateTestsForEachTreeAdapter } from 'parse5-test-utils/utils/common.js';
+
+function createToken(name: $): TagToken {
+ return {
+ type: TokenType.START_TAG,
+ tagName: name,
+ tagID: getTagID(name),
+ ackSelfClosing: false,
+ selfClosing: false,
+ attrs: [],
+ location: null,
+ };
+}
-generateTestsForEachTreeAdapter(module.exports, (_test, treeAdapter) => {
- _test['Insert marker'] = function() {
+generateTestsForEachTreeAdapter('FormattingElementList', (treeAdapter) => {
+ test('Insert marker', () => {
const list = new FormattingElementList(treeAdapter);
list.insertMarker();
- assert.strictEqual(list.length, 1);
- assert.strictEqual(list.entries[0].type, FormattingElementList.MARKER_ENTRY);
+ assert.strictEqual(list.entries.length, 1);
+ assert.strictEqual(list.entries[0].type, EntryType.Marker);
list.insertMarker();
- assert.strictEqual(list.length, 2);
- assert.strictEqual(list.entries[1].type, FormattingElementList.MARKER_ENTRY);
- };
+ assert.strictEqual(list.entries.length, 2);
+ assert.strictEqual(list.entries[0].type, EntryType.Marker);
+ });
- _test['Push element'] = function() {
+ test('Push element', () => {
const list = new FormattingElementList(treeAdapter);
- const element1Token = 'token1';
- const element2Token = 'token2';
+ const element1Token = createToken($.DIV);
+ const element2Token = createToken($.P);
const element1 = treeAdapter.createElement($.DIV, NS.HTML, []);
const element2 = treeAdapter.createElement($.P, NS.HTML, []);
list.pushElement(element1, element1Token);
- assert.strictEqual(list.length, 1);
- assert.strictEqual(list.entries[0].type, FormattingElementList.ELEMENT_ENTRY);
+ assert.strictEqual(list.entries.length, 1);
+ assert.strictEqual(list.entries[0].type, EntryType.Element as const);
assert.strictEqual(list.entries[0].element, element1);
assert.strictEqual(list.entries[0].token, element1Token);
list.pushElement(element2, element2Token);
- assert.strictEqual(list.length, 2);
- assert.strictEqual(list.entries[1].type, FormattingElementList.ELEMENT_ENTRY);
- assert.strictEqual(list.entries[1].element, element2);
- assert.strictEqual(list.entries[1].token, element2Token);
- };
+ assert.strictEqual(list.entries.length, 2);
+ assert.strictEqual(list.entries[0].type, EntryType.Element);
+ assert.strictEqual(list.entries[0].element, element2);
+ assert.strictEqual(list.entries[0].token, element2Token);
+ });
- _test['Insert element after bookmark'] = function() {
+ test('Insert element after bookmark', () => {
const list = new FormattingElementList(treeAdapter);
- const token = 'token1';
const element1 = treeAdapter.createElement($.DIV, NS.HTML, []);
const element2 = treeAdapter.createElement($.P, NS.HTML, []);
const element3 = treeAdapter.createElement($.SPAN, NS.HTML, []);
const element4 = treeAdapter.createElement($.TITLE, NS.HTML, []);
- list.pushElement(element1, token);
+ list.pushElement(element1, createToken($.DIV));
list.bookmark = list.entries[0];
- list.pushElement(element2, token);
- list.pushElement(element3, token);
+ list.pushElement(element2, createToken($.P));
+ list.pushElement(element3, createToken($.SPAN));
- list.insertElementAfterBookmark(element4, token);
+ list.insertElementAfterBookmark(element4, createToken($.TITLE));
- assert.strictEqual(list.length, 4);
- assert.strictEqual(list.entries[1].element, element4);
- };
+ assert.strictEqual(list.entries.length, 4);
+ expect(list.entries[2]).toHaveProperty('element', element4);
+ });
- _test['Push element - Noah Ark condition'] = function() {
+ test('Push element - Noah Ark condition', () => {
const list = new FormattingElementList(treeAdapter);
- const token1 = 'token1';
- const token2 = 'token2';
- const token3 = 'token3';
- const token4 = 'token4';
- const token5 = 'token5';
- const token6 = 'token6';
+ const token1 = createToken($.DIV);
+ const token2 = createToken($.DIV);
+ const token3 = createToken($.DIV);
+ const token4 = createToken($.DIV);
+ const token5 = createToken($.DIV);
+ const token6 = createToken($.DIV);
const element1 = treeAdapter.createElement($.DIV, NS.HTML, [
{ name: 'attr1', value: 'val1' },
- { name: 'attr2', value: 'val2' }
+ { name: 'attr2', value: 'val2' },
]);
const element2 = treeAdapter.createElement($.DIV, NS.HTML, [
{ name: 'attr1', value: 'val1' },
- { name: 'attr2', value: 'someOtherValue' }
+ { name: 'attr2', value: 'someOtherValue' },
]);
list.pushElement(element1, token1);
@@ -86,44 +92,44 @@ generateTestsForEachTreeAdapter(module.exports, (_test, treeAdapter) => {
list.pushElement(element2, token3);
list.pushElement(element1, token4);
- assert.strictEqual(list.length, 4);
- assert.strictEqual(list.entries[0].token, token1);
- assert.strictEqual(list.entries[1].token, token2);
- assert.strictEqual(list.entries[2].token, token3);
- assert.strictEqual(list.entries[3].token, token4);
+ assert.strictEqual(list.entries.length, 4);
+ expect(list.entries[3]).toHaveProperty('token', token1);
+ expect(list.entries[2]).toHaveProperty('token', token2);
+ expect(list.entries[1]).toHaveProperty('token', token3);
+ expect(list.entries[0]).toHaveProperty('token', token4);
list.pushElement(element1, token5);
- assert.strictEqual(list.length, 4);
- assert.strictEqual(list.entries[0].token, token2);
- assert.strictEqual(list.entries[1].token, token3);
- assert.strictEqual(list.entries[2].token, token4);
- assert.strictEqual(list.entries[3].token, token5);
+ assert.strictEqual(list.entries.length, 4);
+ expect(list.entries[3]).toHaveProperty('token', token2);
+ expect(list.entries[2]).toHaveProperty('token', token3);
+ expect(list.entries[1]).toHaveProperty('token', token4);
+ expect(list.entries[0]).toHaveProperty('token', token5);
list.insertMarker();
list.pushElement(element1, token6);
- assert.strictEqual(list.length, 6);
- assert.strictEqual(list.entries[0].token, token2);
- assert.strictEqual(list.entries[1].token, token3);
- assert.strictEqual(list.entries[2].token, token4);
- assert.strictEqual(list.entries[3].token, token5);
- assert.strictEqual(list.entries[4].type, FormattingElementList.MARKER_ENTRY);
- assert.strictEqual(list.entries[5].token, token6);
- };
+ assert.strictEqual(list.entries.length, 6);
+ expect(list.entries[5]).toHaveProperty('token', token2);
+ expect(list.entries[4]).toHaveProperty('token', token3);
+ expect(list.entries[3]).toHaveProperty('token', token4);
+ expect(list.entries[2]).toHaveProperty('token', token5);
+ expect(list.entries[1]).toHaveProperty('type', EntryType.Marker);
+ expect(list.entries[0]).toHaveProperty('token', token6);
+ });
- _test['Clear to the last marker'] = function() {
+ test('Clear to the last marker', () => {
const list = new FormattingElementList(treeAdapter);
- const token = 'token';
+ const token = createToken($.DIV);
const element1 = treeAdapter.createElement($.DIV, NS.HTML, [
{ name: 'attr1', value: 'val1' },
- { name: 'attr2', value: 'val2' }
+ { name: 'attr2', value: 'val2' },
]);
const element2 = treeAdapter.createElement($.DIV, NS.HTML, [
{ name: 'attr1', value: 'val1' },
- { name: 'attr2', value: 'someOtherValue' }
+ { name: 'attr2', value: 'someOtherValue' },
]);
list.pushElement(element1, token);
@@ -135,57 +141,57 @@ generateTestsForEachTreeAdapter(module.exports, (_test, treeAdapter) => {
list.clearToLastMarker();
- assert.strictEqual(list.length, 2);
- };
+ assert.strictEqual(list.entries.length, 2);
+ });
- _test['Remove entry'] = function() {
+ test('Remove entry', () => {
const list = new FormattingElementList(treeAdapter);
- const token = 'token';
+ const token = createToken($.DIV);
const element1 = treeAdapter.createElement($.DIV, NS.HTML, [
{ name: 'attr1', value: 'val1' },
- { name: 'attr2', value: 'val2' }
+ { name: 'attr2', value: 'val2' },
]);
const element2 = treeAdapter.createElement($.DIV, NS.HTML, [
{ name: 'attr1', value: 'val1' },
- { name: 'attr2', value: 'someOtherValue' }
+ { name: 'attr2', value: 'someOtherValue' },
]);
list.pushElement(element1, token);
list.pushElement(element2, token);
list.pushElement(element2, token);
- list.removeEntry(list.entries[0]);
+ list.removeEntry(list.entries[2]);
- assert.strictEqual(list.length, 2);
+ assert.strictEqual(list.entries.length, 2);
- for (let i = list.length - 1; i >= 0; i--) {
- assert.notStrictEqual(list.entries[i].element, element1);
+ for (let i = 0; i < list.entries.length; i++) {
+ expect(list.entries[i]).not.toHaveProperty('element', element1);
}
- };
+ });
- _test['Get entry in scope with given tag name'] = function() {
+ test('Get entry in scope with given tag name', () => {
const list = new FormattingElementList(treeAdapter);
- const token = 'token';
+ const token = createToken($.DIV);
const element = treeAdapter.createElement($.DIV, NS.HTML, []);
assert.ok(!list.getElementEntryInScopeWithTagName($.DIV));
list.pushElement(element, token);
list.pushElement(element, token);
- assert.strictEqual(list.getElementEntryInScopeWithTagName($.DIV), list.entries[1]);
+ assert.strictEqual(list.getElementEntryInScopeWithTagName($.DIV), list.entries[0]);
list.insertMarker();
assert.ok(!list.getElementEntryInScopeWithTagName($.DIV));
list.pushElement(element, token);
- assert.strictEqual(list.getElementEntryInScopeWithTagName($.DIV), list.entries[3]);
- };
+ assert.strictEqual(list.getElementEntryInScopeWithTagName($.DIV), list.entries[0]);
+ });
- _test['Get element entry'] = function() {
+ test('Get element entry', () => {
const list = new FormattingElementList(treeAdapter);
- const token = 'token';
+ const token = createToken($.DIV);
const element1 = treeAdapter.createElement($.DIV, NS.HTML, []);
const element2 = treeAdapter.createElement($.A, NS.HTML, []);
@@ -196,8 +202,9 @@ generateTestsForEachTreeAdapter(module.exports, (_test, treeAdapter) => {
const entry = list.getElementEntry(element1);
- assert.strictEqual(entry.type, FormattingElementList.ELEMENT_ENTRY);
+ assert.ok(entry);
+ assert.strictEqual(entry.type, EntryType.Element);
assert.strictEqual(entry.token, token);
assert.strictEqual(entry.element, element1);
- };
+ });
});
diff --git a/packages/parse5/lib/parser/formatting-element-list.ts b/packages/parse5/lib/parser/formatting-element-list.ts
new file mode 100644
index 0000000000000000000000000000000000000000..f59b7fed60ec03f44e4ae63b2243a5a90d3a4e67
--- /dev/null
+++ b/packages/parse5/lib/parser/formatting-element-list.ts
@@ -0,0 +1,154 @@
+import type { Attribute, TagToken } from '../common/token.js';
+import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface';
+
+//Const
+const NOAH_ARK_CAPACITY = 3;
+
+export enum EntryType {
+ Marker,
+ Element,
+}
+
+interface MarkerEntry {
+ type: EntryType.Marker;
+}
+
+export interface ElementEntry {
+ type: EntryType.Element;
+ element: T['element'];
+ token: TagToken;
+}
+
+export type Entry = MarkerEntry | ElementEntry;
+
+const MARKER: MarkerEntry = { type: EntryType.Marker };
+
+//List of formatting elements
+export class FormattingElementList {
+ entries: Entry[] = [];
+ bookmark: Entry | null = null;
+
+ constructor(private treeAdapter: TreeAdapter) {}
+
+ //Noah Ark's condition
+ //OPTIMIZATION: at first we try to find possible candidates for exclusion using
+ //lightweight heuristics without thorough attributes check.
+ private _getNoahArkConditionCandidates(
+ newElement: T['element'],
+ neAttrs: Attribute[]
+ ): { idx: number; attrs: Attribute[] }[] {
+ const candidates = [];
+
+ const neAttrsLength = neAttrs.length;
+ const neTagName = this.treeAdapter.getTagName(newElement);
+ const neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement);
+
+ for (let i = 0; i < this.entries.length; i++) {
+ const entry = this.entries[i];
+
+ if (entry.type === EntryType.Marker) {
+ break;
+ }
+
+ const { element } = entry;
+
+ if (
+ this.treeAdapter.getTagName(element) === neTagName &&
+ this.treeAdapter.getNamespaceURI(element) === neNamespaceURI
+ ) {
+ const elementAttrs = this.treeAdapter.getAttrList(element);
+
+ if (elementAttrs.length === neAttrsLength) {
+ candidates.push({ idx: i, attrs: elementAttrs });
+ }
+ }
+ }
+
+ return candidates;
+ }
+
+ private _ensureNoahArkCondition(newElement: T['element']): void {
+ if (this.entries.length < NOAH_ARK_CAPACITY) return;
+
+ const neAttrs = this.treeAdapter.getAttrList(newElement);
+ const candidates = this._getNoahArkConditionCandidates(newElement, neAttrs);
+
+ if (candidates.length < NOAH_ARK_CAPACITY) return;
+
+ //NOTE: build attrs map for the new element, so we can perform fast lookups
+ const neAttrsMap = new Map(neAttrs.map((neAttr: Attribute) => [neAttr.name, neAttr.value]));
+ let validCandidates = 0;
+
+ //NOTE: remove bottommost candidates, until Noah's Ark condition will not be met
+ for (let i = 0; i < candidates.length; i++) {
+ const candidate = candidates[i];
+
+ // We know that `candidate.attrs.length === neAttrs.length`
+ if (candidate.attrs.every((cAttr) => neAttrsMap.get(cAttr.name) === cAttr.value)) {
+ validCandidates += 1;
+
+ if (validCandidates >= NOAH_ARK_CAPACITY) {
+ this.entries.splice(candidate.idx, 1);
+ }
+ }
+ }
+ }
+
+ //Mutations
+ insertMarker(): void {
+ this.entries.unshift(MARKER);
+ }
+
+ pushElement(element: T['element'], token: TagToken): void {
+ this._ensureNoahArkCondition(element);
+
+ this.entries.unshift({
+ type: EntryType.Element,
+ element,
+ token,
+ });
+ }
+
+ insertElementAfterBookmark(element: T['element'], token: TagToken): void {
+ const bookmarkIdx = this.entries.indexOf(this.bookmark!);
+
+ this.entries.splice(bookmarkIdx, 0, {
+ type: EntryType.Element,
+ element,
+ token,
+ });
+ }
+
+ removeEntry(entry: Entry): void {
+ const entryIndex = this.entries.indexOf(entry);
+
+ if (entryIndex >= 0) {
+ this.entries.splice(entryIndex, 1);
+ }
+ }
+
+ clearToLastMarker(): void {
+ const markerIdx = this.entries.indexOf(MARKER);
+
+ if (markerIdx >= 0) {
+ this.entries.splice(0, markerIdx + 1);
+ } else {
+ this.entries.length = 0;
+ }
+ }
+
+ //Search
+ getElementEntryInScopeWithTagName(tagName: string): ElementEntry | null {
+ const entry = this.entries.find(
+ (entry) => entry.type === EntryType.Marker || this.treeAdapter.getTagName(entry.element) === tagName
+ );
+
+ return entry && entry.type === EntryType.Element ? entry : null;
+ }
+
+ getElementEntry(element: T['element']): ElementEntry | undefined {
+ return this.entries.find(
+ (entry): entry is ElementEntry => entry.type === EntryType.Element && entry.element === element
+ );
+ }
+}
diff --git a/packages/parse5/lib/parser/index.js b/packages/parse5/lib/parser/index.js
deleted file mode 100644
index 8760b44b10519d5ce387ad3bd97b611a659d00e1..0000000000000000000000000000000000000000
--- a/packages/parse5/lib/parser/index.js
+++ /dev/null
@@ -1,3034 +0,0 @@
-'use strict';
-
-const Tokenizer = require('../tokenizer');
-const OpenElementStack = require('./open-element-stack');
-const FormattingElementList = require('./formatting-element-list');
-const LocationInfoParserMixin = require('../extensions/location-info/parser-mixin');
-const ErrorReportingParserMixin = require('../extensions/error-reporting/parser-mixin');
-const Mixin = require('../utils/mixin');
-const defaultTreeAdapter = require('../tree-adapters/default');
-const mergeOptions = require('../utils/merge-options');
-const doctype = require('../common/doctype');
-const foreignContent = require('../common/foreign-content');
-const ERR = require('../common/error-codes');
-const unicode = require('../common/unicode');
-const HTML = require('../common/html');
-
-//Aliases
-const $ = HTML.TAG_NAMES;
-const NS = HTML.NAMESPACES;
-const ATTRS = HTML.ATTRS;
-
-const DEFAULT_OPTIONS = {
- scriptingEnabled: true,
- sourceCodeLocationInfo: false,
- onParseError: null,
- treeAdapter: defaultTreeAdapter
-};
-
-//Misc constants
-const HIDDEN_INPUT_TYPE = 'hidden';
-
-//Adoption agency loops iteration count
-const AA_OUTER_LOOP_ITER = 8;
-const AA_INNER_LOOP_ITER = 3;
-
-//Insertion modes
-const INITIAL_MODE = 'INITIAL_MODE';
-const BEFORE_HTML_MODE = 'BEFORE_HTML_MODE';
-const BEFORE_HEAD_MODE = 'BEFORE_HEAD_MODE';
-const IN_HEAD_MODE = 'IN_HEAD_MODE';
-const IN_HEAD_NO_SCRIPT_MODE = 'IN_HEAD_NO_SCRIPT_MODE';
-const AFTER_HEAD_MODE = 'AFTER_HEAD_MODE';
-const IN_BODY_MODE = 'IN_BODY_MODE';
-const TEXT_MODE = 'TEXT_MODE';
-const IN_TABLE_MODE = 'IN_TABLE_MODE';
-const IN_TABLE_TEXT_MODE = 'IN_TABLE_TEXT_MODE';
-const IN_CAPTION_MODE = 'IN_CAPTION_MODE';
-const IN_COLUMN_GROUP_MODE = 'IN_COLUMN_GROUP_MODE';
-const IN_TABLE_BODY_MODE = 'IN_TABLE_BODY_MODE';
-const IN_ROW_MODE = 'IN_ROW_MODE';
-const IN_CELL_MODE = 'IN_CELL_MODE';
-const IN_SELECT_MODE = 'IN_SELECT_MODE';
-const IN_SELECT_IN_TABLE_MODE = 'IN_SELECT_IN_TABLE_MODE';
-const IN_TEMPLATE_MODE = 'IN_TEMPLATE_MODE';
-const AFTER_BODY_MODE = 'AFTER_BODY_MODE';
-const IN_FRAMESET_MODE = 'IN_FRAMESET_MODE';
-const AFTER_FRAMESET_MODE = 'AFTER_FRAMESET_MODE';
-const AFTER_AFTER_BODY_MODE = 'AFTER_AFTER_BODY_MODE';
-const AFTER_AFTER_FRAMESET_MODE = 'AFTER_AFTER_FRAMESET_MODE';
-
-//Insertion mode reset map
-const INSERTION_MODE_RESET_MAP = {
- [$.TR]: IN_ROW_MODE,
- [$.TBODY]: IN_TABLE_BODY_MODE,
- [$.THEAD]: IN_TABLE_BODY_MODE,
- [$.TFOOT]: IN_TABLE_BODY_MODE,
- [$.CAPTION]: IN_CAPTION_MODE,
- [$.COLGROUP]: IN_COLUMN_GROUP_MODE,
- [$.TABLE]: IN_TABLE_MODE,
- [$.BODY]: IN_BODY_MODE,
- [$.FRAMESET]: IN_FRAMESET_MODE
-};
-
-//Template insertion mode switch map
-const TEMPLATE_INSERTION_MODE_SWITCH_MAP = {
- [$.CAPTION]: IN_TABLE_MODE,
- [$.COLGROUP]: IN_TABLE_MODE,
- [$.TBODY]: IN_TABLE_MODE,
- [$.TFOOT]: IN_TABLE_MODE,
- [$.THEAD]: IN_TABLE_MODE,
- [$.COL]: IN_COLUMN_GROUP_MODE,
- [$.TR]: IN_TABLE_BODY_MODE,
- [$.TD]: IN_ROW_MODE,
- [$.TH]: IN_ROW_MODE
-};
-
-//Token handlers map for insertion modes
-const TOKEN_HANDLERS = {
- [INITIAL_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: tokenInInitialMode,
- [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInInitialMode,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: doctypeInInitialMode,
- [Tokenizer.START_TAG_TOKEN]: tokenInInitialMode,
- [Tokenizer.END_TAG_TOKEN]: tokenInInitialMode,
- [Tokenizer.EOF_TOKEN]: tokenInInitialMode
- },
- [BEFORE_HTML_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: tokenBeforeHtml,
- [Tokenizer.NULL_CHARACTER_TOKEN]: tokenBeforeHtml,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagBeforeHtml,
- [Tokenizer.END_TAG_TOKEN]: endTagBeforeHtml,
- [Tokenizer.EOF_TOKEN]: tokenBeforeHtml
- },
- [BEFORE_HEAD_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: tokenBeforeHead,
- [Tokenizer.NULL_CHARACTER_TOKEN]: tokenBeforeHead,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype,
- [Tokenizer.START_TAG_TOKEN]: startTagBeforeHead,
- [Tokenizer.END_TAG_TOKEN]: endTagBeforeHead,
- [Tokenizer.EOF_TOKEN]: tokenBeforeHead
- },
- [IN_HEAD_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: tokenInHead,
- [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInHead,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype,
- [Tokenizer.START_TAG_TOKEN]: startTagInHead,
- [Tokenizer.END_TAG_TOKEN]: endTagInHead,
- [Tokenizer.EOF_TOKEN]: tokenInHead
- },
- [IN_HEAD_NO_SCRIPT_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: tokenInHeadNoScript,
- [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInHeadNoScript,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype,
- [Tokenizer.START_TAG_TOKEN]: startTagInHeadNoScript,
- [Tokenizer.END_TAG_TOKEN]: endTagInHeadNoScript,
- [Tokenizer.EOF_TOKEN]: tokenInHeadNoScript
- },
- [AFTER_HEAD_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: tokenAfterHead,
- [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterHead,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype,
- [Tokenizer.START_TAG_TOKEN]: startTagAfterHead,
- [Tokenizer.END_TAG_TOKEN]: endTagAfterHead,
- [Tokenizer.EOF_TOKEN]: tokenAfterHead
- },
- [IN_BODY_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: characterInBody,
- [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagInBody,
- [Tokenizer.END_TAG_TOKEN]: endTagInBody,
- [Tokenizer.EOF_TOKEN]: eofInBody
- },
- [TEXT_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.NULL_CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.COMMENT_TOKEN]: ignoreToken,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: ignoreToken,
- [Tokenizer.END_TAG_TOKEN]: endTagInText,
- [Tokenizer.EOF_TOKEN]: eofInText
- },
- [IN_TABLE_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: characterInTable,
- [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: characterInTable,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagInTable,
- [Tokenizer.END_TAG_TOKEN]: endTagInTable,
- [Tokenizer.EOF_TOKEN]: eofInBody
- },
- [IN_TABLE_TEXT_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: characterInTableText,
- [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInTableText,
- [Tokenizer.COMMENT_TOKEN]: tokenInTableText,
- [Tokenizer.DOCTYPE_TOKEN]: tokenInTableText,
- [Tokenizer.START_TAG_TOKEN]: tokenInTableText,
- [Tokenizer.END_TAG_TOKEN]: tokenInTableText,
- [Tokenizer.EOF_TOKEN]: tokenInTableText
- },
- [IN_CAPTION_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: characterInBody,
- [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagInCaption,
- [Tokenizer.END_TAG_TOKEN]: endTagInCaption,
- [Tokenizer.EOF_TOKEN]: eofInBody
- },
- [IN_COLUMN_GROUP_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: tokenInColumnGroup,
- [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInColumnGroup,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagInColumnGroup,
- [Tokenizer.END_TAG_TOKEN]: endTagInColumnGroup,
- [Tokenizer.EOF_TOKEN]: eofInBody
- },
- [IN_TABLE_BODY_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: characterInTable,
- [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: characterInTable,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagInTableBody,
- [Tokenizer.END_TAG_TOKEN]: endTagInTableBody,
- [Tokenizer.EOF_TOKEN]: eofInBody
- },
- [IN_ROW_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: characterInTable,
- [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: characterInTable,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagInRow,
- [Tokenizer.END_TAG_TOKEN]: endTagInRow,
- [Tokenizer.EOF_TOKEN]: eofInBody
- },
- [IN_CELL_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: characterInBody,
- [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagInCell,
- [Tokenizer.END_TAG_TOKEN]: endTagInCell,
- [Tokenizer.EOF_TOKEN]: eofInBody
- },
- [IN_SELECT_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagInSelect,
- [Tokenizer.END_TAG_TOKEN]: endTagInSelect,
- [Tokenizer.EOF_TOKEN]: eofInBody
- },
- [IN_SELECT_IN_TABLE_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagInSelectInTable,
- [Tokenizer.END_TAG_TOKEN]: endTagInSelectInTable,
- [Tokenizer.EOF_TOKEN]: eofInBody
- },
- [IN_TEMPLATE_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: characterInBody,
- [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagInTemplate,
- [Tokenizer.END_TAG_TOKEN]: endTagInTemplate,
- [Tokenizer.EOF_TOKEN]: eofInTemplate
- },
- [AFTER_BODY_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: tokenAfterBody,
- [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterBody,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody,
- [Tokenizer.COMMENT_TOKEN]: appendCommentToRootHtmlElement,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagAfterBody,
- [Tokenizer.END_TAG_TOKEN]: endTagAfterBody,
- [Tokenizer.EOF_TOKEN]: stopParsing
- },
- [IN_FRAMESET_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagInFrameset,
- [Tokenizer.END_TAG_TOKEN]: endTagInFrameset,
- [Tokenizer.EOF_TOKEN]: stopParsing
- },
- [AFTER_FRAMESET_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters,
- [Tokenizer.COMMENT_TOKEN]: appendComment,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagAfterFrameset,
- [Tokenizer.END_TAG_TOKEN]: endTagAfterFrameset,
- [Tokenizer.EOF_TOKEN]: stopParsing
- },
- [AFTER_AFTER_BODY_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: tokenAfterAfterBody,
- [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterAfterBody,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody,
- [Tokenizer.COMMENT_TOKEN]: appendCommentToDocument,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagAfterAfterBody,
- [Tokenizer.END_TAG_TOKEN]: tokenAfterAfterBody,
- [Tokenizer.EOF_TOKEN]: stopParsing
- },
- [AFTER_AFTER_FRAMESET_MODE]: {
- [Tokenizer.CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken,
- [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody,
- [Tokenizer.COMMENT_TOKEN]: appendCommentToDocument,
- [Tokenizer.DOCTYPE_TOKEN]: ignoreToken,
- [Tokenizer.START_TAG_TOKEN]: startTagAfterAfterFrameset,
- [Tokenizer.END_TAG_TOKEN]: ignoreToken,
- [Tokenizer.EOF_TOKEN]: stopParsing
- }
-};
-
-//Parser
-class Parser {
- constructor(options) {
- this.options = mergeOptions(DEFAULT_OPTIONS, options);
-
- this.treeAdapter = this.options.treeAdapter;
- this.pendingScript = null;
- this.nodeInfo = {};
-
- if(this.options.componentValidator){
- this.validator = this.options.componentValidator;
- }
-
- if(this.options.compileResult){
- this.compileResult = this.options.compileResult;
- }
-
- if (this.options.sourceCodeLocationInfo) {
- Mixin.install(this, LocationInfoParserMixin);
- }
-
- if (this.options.onParseError) {
- Mixin.install(this, ErrorReportingParserMixin, { onParseError: this.options.onParseError });
- }
- }
-
- // API
- parse(html) {
- const document = this.treeAdapter.createDocument();
-
- this._bootstrap(document, null);
- this.tokenizer.write(html, true);
- this._runParsingLoop(null);
-
- return document;
- }
-
- parseFragment(html, fragmentContext) {
- //NOTE: use element as a fragment context if context element was not provided,
- //so we will parse in "forgiving" manner
- if (!fragmentContext) {
- fragmentContext = this.treeAdapter.createElement($.TEMPLATE, NS.HTML, []);
- }
-
- //NOTE: create fake element which will be used as 'document' for fragment parsing.
- //This is important for jsdom there 'document' can't be recreated, therefore
- //fragment parsing causes messing of the main `document`.
- const documentMock = this.treeAdapter.createElement('documentmock', NS.HTML, []);
-
- this._bootstrap(documentMock, fragmentContext);
-
- if (this.treeAdapter.getTagName(fragmentContext) === $.TEMPLATE) {
- this._pushTmplInsertionMode(IN_TEMPLATE_MODE);
- }
-
- this._initTokenizerForFragmentParsing();
- this._insertFakeRootElement();
- this._resetInsertionMode();
- this._findFormInFragmentContext();
- this.tokenizer.write(html, true);
- this._runParsingLoop(null);
-
- const rootElement = this.treeAdapter.getFirstChild(documentMock);
- const fragment = this.treeAdapter.createDocumentFragment();
-
- this._adoptNodes(rootElement, fragment);
-
- return fragment;
- }
-
- //Bootstrap parser
- _bootstrap(document, fragmentContext) {
- this.tokenizer = new Tokenizer(this.options);
-
- this.stopped = false;
-
- this.insertionMode = INITIAL_MODE;
- this.originalInsertionMode = '';
-
- this.document = document;
- this.fragmentContext = fragmentContext;
-
- this.headElement = null;
- this.formElement = null;
-
- this.openElements = new OpenElementStack(this.document, this.treeAdapter);
- this.activeFormattingElements = new FormattingElementList(this.treeAdapter);
-
- this.tmplInsertionModeStack = [];
- this.tmplInsertionModeStackTop = -1;
- this.currentTmplInsertionMode = null;
-
- this.pendingCharacterTokens = [];
- this.hasNonWhitespacePendingCharacterToken = false;
-
- this.framesetOk = true;
- this.skipNextNewLine = false;
- this.fosterParentingEnabled = false;
- }
-
- //Errors
- _err() {
- // NOTE: err reporting is noop by default. Enabled by mixin.
- }
-
- //Parsing loop
- _runParsingLoop(scriptHandler) {
- let lastToken = {};
- while (!this.stopped) {
- this._setupTokenizerCDATAMode();
-
- const token = this.tokenizer.getNextToken();
-
- if (token.type === Tokenizer.HIBERNATION_TOKEN) {
- break;
- }
- if (token.type !== Tokenizer.EOF_TOKEN && token.type !== Tokenizer.WHITESPACE_CHARACTER_TOKEN) {
- lastToken =token;
- }
- checkselfClosingNode(this, token);
- if (this.skipNextNewLine) {
- this.skipNextNewLine = false;
-
- if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') {
- if (token.chars.length === 1) {
- continue;
- }
-
- token.chars = token.chars.substr(1);
- }
- }
-
- this._processInputToken(token);
-
- if (scriptHandler && this.pendingScript) {
- break;
- }
- }
- checkInvalid(this, lastToken);
- }
-
- runParsingLoopForCurrentChunk(writeCallback, scriptHandler) {
- this._runParsingLoop(scriptHandler);
-
- if (scriptHandler && this.pendingScript) {
- const script = this.pendingScript;
-
- this.pendingScript = null;
-
- scriptHandler(script);
-
- return;
- }
-
- if (writeCallback) {
- writeCallback();
- }
- }
-
- //Text parsing
- _setupTokenizerCDATAMode() {
- const current = this._getAdjustedCurrentElement();
-
- this.tokenizer.allowCDATA =
- current &&
- current !== this.document &&
- this.treeAdapter.getNamespaceURI(current) !== NS.HTML &&
- !this._isIntegrationPoint(current);
- }
-
- _switchToTextParsing(currentToken, nextTokenizerState) {
- this._insertElement(currentToken, NS.HTML);
- this.tokenizer.state = nextTokenizerState;
- this.originalInsertionMode = this.insertionMode;
- this.insertionMode = TEXT_MODE;
- }
-
- switchToPlaintextParsing() {
- this.insertionMode = TEXT_MODE;
- this.originalInsertionMode = IN_BODY_MODE;
- this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
- }
-
- //Fragment parsing
- _getAdjustedCurrentElement() {
- return this.openElements.stackTop === 0 && this.fragmentContext
- ? this.fragmentContext
- : this.openElements.current;
- }
-
- _findFormInFragmentContext() {
- let node = this.fragmentContext;
-
- do {
- if (this.treeAdapter.getTagName(node) === $.FORM) {
- this.formElement = node;
- break;
- }
-
- node = this.treeAdapter.getParentNode(node);
- } while (node);
- }
-
- _initTokenizerForFragmentParsing() {
- if (this.treeAdapter.getNamespaceURI(this.fragmentContext) === NS.HTML) {
- const tn = this.treeAdapter.getTagName(this.fragmentContext);
-
- if (tn === $.TITLE || tn === $.TEXTAREA) {
- this.tokenizer.state = Tokenizer.MODE.RCDATA;
- } else if (
- tn === $.STYLE ||
- tn === $.XMP ||
- tn === $.IFRAME ||
- tn === $.NOEMBED ||
- tn === $.NOFRAMES ||
- tn === $.NOSCRIPT
- ) {
- this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
- } else if (tn === $.SCRIPT) {
- this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
- } else if (tn === $.PLAINTEXT) {
- this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
- }
- }
- }
-
- //Tree mutation
- _setDocumentType(token) {
- const name = token.name || '';
- const publicId = token.publicId || '';
- const systemId = token.systemId || '';
-
- this.treeAdapter.setDocumentType(this.document, name, publicId, systemId);
- }
-
- _attachElementToTree(element) {
- if (this._shouldFosterParentOnInsertion()) {
- this._fosterParentElement(element);
- } else {
- const parent = this.openElements.currentTmplContent || this.openElements.current;
-
- this.treeAdapter.appendChild(parent, element);
- }
- }
-
- _appendElement(token, namespaceURI) {
- const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
-
- this._attachElementToTree(element);
- }
-
- _insertElement(token, namespaceURI) {
- const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
-
- this._attachElementToTree(element);
- this.openElements.push(element);
- }
-
- _insertFakeElement(tagName) {
- const element = this.treeAdapter.createElement(tagName, NS.HTML, []);
-
- this._attachElementToTree(element);
- this.openElements.push(element);
- }
-
- _insertTemplate(token) {
- const tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs);
- const content = this.treeAdapter.createDocumentFragment();
-
- this.treeAdapter.setTemplateContent(tmpl, content);
- this._attachElementToTree(tmpl);
- this.openElements.push(tmpl);
- }
-
- _insertFakeRootElement() {
- const element = this.treeAdapter.createElement($.HTML, NS.HTML, []);
-
- this.treeAdapter.appendChild(this.openElements.current, element);
- this.openElements.push(element);
- }
-
- _appendCommentNode(token, parent) {
- const commentNode = this.treeAdapter.createCommentNode(token.data);
-
- this.treeAdapter.appendChild(parent, commentNode);
- }
-
- _insertCharacters(token) {
- if (this._shouldFosterParentOnInsertion()) {
- this._fosterParentText(token.chars);
- } else {
- const parent = this.openElements.currentTmplContent || this.openElements.current;
-
- this.treeAdapter.insertText(parent, token.chars);
- }
- }
-
- _adoptNodes(donor, recipient) {
- for (let child = this.treeAdapter.getFirstChild(donor); child; child = this.treeAdapter.getFirstChild(donor)) {
- this.treeAdapter.detachNode(child);
- this.treeAdapter.appendChild(recipient, child);
- }
- }
-
- //Token processing
- _shouldProcessTokenInForeignContent(token) {
- const current = this._getAdjustedCurrentElement();
-
- if (!current || current === this.document) {
- return false;
- }
-
- const ns = this.treeAdapter.getNamespaceURI(current);
-
- if (ns === NS.HTML) {
- return false;
- }
-
- if (
- this.treeAdapter.getTagName(current) === $.ANNOTATION_XML &&
- ns === NS.MATHML &&
- token.type === Tokenizer.START_TAG_TOKEN &&
- token.tagName === $.SVG
- ) {
- return false;
- }
-
- const isCharacterToken =
- token.type === Tokenizer.CHARACTER_TOKEN ||
- token.type === Tokenizer.NULL_CHARACTER_TOKEN ||
- token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN;
-
- const isMathMLTextStartTag =
- token.type === Tokenizer.START_TAG_TOKEN && token.tagName !== $.MGLYPH && token.tagName !== $.MALIGNMARK;
-
- if ((isMathMLTextStartTag || isCharacterToken) && this._isIntegrationPoint(current, NS.MATHML)) {
- return false;
- }
-
- if (
- (token.type === Tokenizer.START_TAG_TOKEN || isCharacterToken) &&
- this._isIntegrationPoint(current, NS.HTML)
- ) {
- return false;
- }
-
- return token.type !== Tokenizer.EOF_TOKEN;
- }
-
- _processToken(token) {
- TOKEN_HANDLERS[this.insertionMode][token.type](this, token);
- }
-
- _processTokenInBodyMode(token) {
- TOKEN_HANDLERS[IN_BODY_MODE][token.type](this, token);
- }
-
- _processTokenInForeignContent(token) {
- if (token.type === Tokenizer.CHARACTER_TOKEN) {
- characterInForeignContent(this, token);
- } else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN) {
- nullCharacterInForeignContent(this, token);
- } else if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN) {
- insertCharacters(this, token);
- } else if (token.type === Tokenizer.COMMENT_TOKEN) {
- appendComment(this, token);
- } else if (token.type === Tokenizer.START_TAG_TOKEN) {
- startTagInForeignContent(this, token);
- } else if (token.type === Tokenizer.END_TAG_TOKEN) {
- endTagInForeignContent(this, token);
- }
- }
-
- _processInputToken(token) {
- if (this._shouldProcessTokenInForeignContent(token)) {
- this._processTokenInForeignContent(token);
- } else {
- this._processToken(token);
- }
-
- if (token.type === Tokenizer.START_TAG_TOKEN && token.selfClosing && !token.ackSelfClosing) {
- this._err(ERR.nonVoidHtmlElementStartTagWithTrailingSolidus);
- }
- }
-
- //Integration points
- _isIntegrationPoint(element, foreignNS) {
- const tn = this.treeAdapter.getTagName(element);
- const ns = this.treeAdapter.getNamespaceURI(element);
- const attrs = this.treeAdapter.getAttrList(element);
-
- return foreignContent.isIntegrationPoint(tn, ns, attrs, foreignNS);
- }
-
- //Active formatting elements reconstruction
- _reconstructActiveFormattingElements() {
- const listLength = this.activeFormattingElements.length;
-
- if (listLength) {
- let unopenIdx = listLength;
- let entry = null;
-
- do {
- unopenIdx--;
- entry = this.activeFormattingElements.entries[unopenIdx];
-
- if (entry.type === FormattingElementList.MARKER_ENTRY || this.openElements.contains(entry.element)) {
- unopenIdx++;
- break;
- }
- } while (unopenIdx > 0);
-
- for (let i = unopenIdx; i < listLength; i++) {
- entry = this.activeFormattingElements.entries[i];
- this._insertElement(entry.token, this.treeAdapter.getNamespaceURI(entry.element));
- entry.element = this.openElements.current;
- }
- }
- }
-
- //Close elements
- _closeTableCell() {
- this.openElements.generateImpliedEndTags();
- this.openElements.popUntilTableCellPopped();
- this.activeFormattingElements.clearToLastMarker();
- this.insertionMode = IN_ROW_MODE;
- }
-
- _closePElement() {
- this.openElements.generateImpliedEndTagsWithExclusion($.P);
- this.openElements.popUntilTagNamePopped($.P);
- }
-
- //Insertion modes
- _resetInsertionMode() {
- for (let i = this.openElements.stackTop, last = false; i >= 0; i--) {
- let element = this.openElements.items[i];
-
- if (i === 0) {
- last = true;
-
- if (this.fragmentContext) {
- element = this.fragmentContext;
- }
- }
-
- const tn = this.treeAdapter.getTagName(element);
- const newInsertionMode = INSERTION_MODE_RESET_MAP[tn];
-
- if (newInsertionMode) {
- this.insertionMode = newInsertionMode;
- break;
- } else if (!last && (tn === $.TD || tn === $.TH)) {
- this.insertionMode = IN_CELL_MODE;
- break;
- } else if (!last && tn === $.HEAD) {
- this.insertionMode = IN_HEAD_MODE;
- break;
- } else if (tn === $.SELECT) {
- this._resetInsertionModeForSelect(i);
- break;
- } else if (tn === $.TEMPLATE) {
- this.insertionMode = this.currentTmplInsertionMode;
- break;
- } else if (tn === $.HTML) {
- this.insertionMode = this.headElement ? AFTER_HEAD_MODE : BEFORE_HEAD_MODE;
- break;
- } else if (last) {
- this.insertionMode = IN_BODY_MODE;
- break;
- }
- }
- }
-
- _resetInsertionModeForSelect(selectIdx) {
- if (selectIdx > 0) {
- for (let i = selectIdx - 1; i > 0; i--) {
- const ancestor = this.openElements.items[i];
- const tn = this.treeAdapter.getTagName(ancestor);
-
- if (tn === $.TEMPLATE) {
- break;
- } else if (tn === $.TABLE) {
- this.insertionMode = IN_SELECT_IN_TABLE_MODE;
- return;
- }
- }
- }
-
- this.insertionMode = IN_SELECT_MODE;
- }
-
- _pushTmplInsertionMode(mode) {
- this.tmplInsertionModeStack.push(mode);
- this.tmplInsertionModeStackTop++;
- this.currentTmplInsertionMode = mode;
- }
-
- _popTmplInsertionMode() {
- this.tmplInsertionModeStack.pop();
- this.tmplInsertionModeStackTop--;
- this.currentTmplInsertionMode = this.tmplInsertionModeStack[this.tmplInsertionModeStackTop];
- }
-
- //Foster parenting
- _isElementCausesFosterParenting(element) {
- const tn = this.treeAdapter.getTagName(element);
-
- return tn === $.TABLE || tn === $.TBODY || tn === $.TFOOT || tn === $.THEAD || tn === $.TR;
- }
-
- _shouldFosterParentOnInsertion() {
- return this.fosterParentingEnabled && this._isElementCausesFosterParenting(this.openElements.current);
- }
-
- _findFosterParentingLocation() {
- const location = {
- parent: null,
- beforeElement: null
- };
-
- for (let i = this.openElements.stackTop; i >= 0; i--) {
- const openElement = this.openElements.items[i];
- const tn = this.treeAdapter.getTagName(openElement);
- const ns = this.treeAdapter.getNamespaceURI(openElement);
-
- if (tn === $.TEMPLATE && ns === NS.HTML) {
- location.parent = this.treeAdapter.getTemplateContent(openElement);
- break;
- } else if (tn === $.TABLE) {
- location.parent = this.treeAdapter.getParentNode(openElement);
-
- if (location.parent) {
- location.beforeElement = openElement;
- } else {
- location.parent = this.openElements.items[i - 1];
- }
-
- break;
- }
- }
-
- if (!location.parent) {
- location.parent = this.openElements.items[0];
- }
-
- return location;
- }
-
- _fosterParentElement(element) {
- const location = this._findFosterParentingLocation();
-
- if (location.beforeElement) {
- this.treeAdapter.insertBefore(location.parent, element, location.beforeElement);
- } else {
- this.treeAdapter.appendChild(location.parent, element);
- }
- }
-
- _fosterParentText(chars) {
- const location = this._findFosterParentingLocation();
-
- if (location.beforeElement) {
- this.treeAdapter.insertTextBefore(location.parent, chars, location.beforeElement);
- } else {
- this.treeAdapter.insertText(location.parent, chars);
- }
- }
-
- //Special elements
- _isSpecialElement(element) {
- const tn = this.treeAdapter.getTagName(element);
- const ns = this.treeAdapter.getNamespaceURI(element);
-
- return HTML.SPECIAL_ELEMENTS[ns][tn];
- }
-}
-
-/**
- * Check if the node is self closing.
- * @param {Object} parse parse5 object.
- * @param {Object} token Hml text token information.
- */
- function checkselfClosingNode(parse, token) {
- const tagName = (token.tagName || "").toLowerCase();
- const selfClosing = token.selfClosing;
- const flag = parse.validator.isSupportedSelfClosing(tagName);
- if (parse.nodeInfo.tn && tagName && !parse.nodeInfo.sc) {
- const loc =
- String(token.location.startLine) + String(token.location.startCol);
- if (
- !flag ||
- (loc !== parse.nodeInfo.pos && token.type === Tokenizer.START_TAG_TOKEN)
- ) {
- parse.compileResult.log.push({
- line: String(token.location.startLine) || 1,
- column: String(token.location.startCol) || 1,
- reason: 'ERROR: tag `' + parse.nodeInfo.tn + '` must be closed, please follow norm',
- });
- parse.nodeInfo = {};
- }
- }
- if (tagName && flag) {
- if (token.type === Tokenizer.START_TAG_TOKEN && !selfClosing) {
- parse.nodeInfo.tn = tagName;
- parse.nodeInfo.sc = false;
- parse.nodeInfo.pos =
- String(token.location.line) + String(token.location.col);
- }
- if (
- token.type === Tokenizer.END_TAG_TOKEN &&
- tagName === parse.nodeInfo.tn
- ) {
- parse.nodeInfo.sc = true;
- }
- }
- if (!flag && selfClosing && token.type === Tokenizer.START_TAG_TOKEN) {
- parse.compileResult.log.push({
- line: token.location.startLine || 1,
- column: token.location.startCol || 1,
- reason: "ERROR: tag `" + tagName + "` can not use selfClosing",
- });
- }
- }
-
- /**
- * Check if the html text is legal.
- * @param {Object} lastToken Hml text last token information.
- */
- function checkInvalid(lastToken) {
- if (
- lastToken.type && lastToken.type !== Tokenizer.END_TAG_TOKEN &&
- lastToken.type !== Tokenizer.COMMENT_TOKEN
- ) {
- compileResult.log.push({
- line: lastToken.location.startLine || 1,
- column: lastToken.location.startCol || 1,
- reason: "ERROR: hml content is invalid. Please check it.",
- });
- }
- }
-
-module.exports = Parser;
-
-//Adoption agency algorithm
-//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adoptionAgency)
-//------------------------------------------------------------------
-
-//Steps 5-8 of the algorithm
-function aaObtainFormattingElementEntry(p, token) {
- let formattingElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(token.tagName);
-
- if (formattingElementEntry) {
- if (!p.openElements.contains(formattingElementEntry.element)) {
- p.activeFormattingElements.removeEntry(formattingElementEntry);
- formattingElementEntry = null;
- } else if (!p.openElements.hasInScope(token.tagName)) {
- formattingElementEntry = null;
- }
- } else {
- genericEndTagInBody(p, token);
- }
-
- return formattingElementEntry;
-}
-
-//Steps 9 and 10 of the algorithm
-function aaObtainFurthestBlock(p, formattingElementEntry) {
- let furthestBlock = null;
-
- for (let i = p.openElements.stackTop; i >= 0; i--) {
- const element = p.openElements.items[i];
-
- if (element === formattingElementEntry.element) {
- break;
- }
-
- if (p._isSpecialElement(element)) {
- furthestBlock = element;
- }
- }
-
- if (!furthestBlock) {
- p.openElements.popUntilElementPopped(formattingElementEntry.element);
- p.activeFormattingElements.removeEntry(formattingElementEntry);
- }
-
- return furthestBlock;
-}
-
-//Step 13 of the algorithm
-function aaInnerLoop(p, furthestBlock, formattingElement) {
- let lastElement = furthestBlock;
- let nextElement = p.openElements.getCommonAncestor(furthestBlock);
-
- for (let i = 0, element = nextElement; element !== formattingElement; i++, element = nextElement) {
- //NOTE: store next element for the next loop iteration (it may be deleted from the stack by step 9.5)
- nextElement = p.openElements.getCommonAncestor(element);
-
- const elementEntry = p.activeFormattingElements.getElementEntry(element);
- const counterOverflow = elementEntry && i >= AA_INNER_LOOP_ITER;
- const shouldRemoveFromOpenElements = !elementEntry || counterOverflow;
-
- if (shouldRemoveFromOpenElements) {
- if (counterOverflow) {
- p.activeFormattingElements.removeEntry(elementEntry);
- }
-
- p.openElements.remove(element);
- } else {
- element = aaRecreateElementFromEntry(p, elementEntry);
-
- if (lastElement === furthestBlock) {
- p.activeFormattingElements.bookmark = elementEntry;
- }
-
- p.treeAdapter.detachNode(lastElement);
- p.treeAdapter.appendChild(element, lastElement);
- lastElement = element;
- }
- }
-
- return lastElement;
-}
-
-//Step 13.7 of the algorithm
-function aaRecreateElementFromEntry(p, elementEntry) {
- const ns = p.treeAdapter.getNamespaceURI(elementEntry.element);
- const newElement = p.treeAdapter.createElement(elementEntry.token.tagName, ns, elementEntry.token.attrs);
-
- p.openElements.replace(elementEntry.element, newElement);
- elementEntry.element = newElement;
-
- return newElement;
-}
-
-//Step 14 of the algorithm
-function aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement) {
- if (p._isElementCausesFosterParenting(commonAncestor)) {
- p._fosterParentElement(lastElement);
- } else {
- const tn = p.treeAdapter.getTagName(commonAncestor);
- const ns = p.treeAdapter.getNamespaceURI(commonAncestor);
-
- if (tn === $.TEMPLATE && ns === NS.HTML) {
- commonAncestor = p.treeAdapter.getTemplateContent(commonAncestor);
- }
-
- p.treeAdapter.appendChild(commonAncestor, lastElement);
- }
-}
-
-//Steps 15-19 of the algorithm
-function aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry) {
- const ns = p.treeAdapter.getNamespaceURI(formattingElementEntry.element);
- const token = formattingElementEntry.token;
- const newElement = p.treeAdapter.createElement(token.tagName, ns, token.attrs);
-
- p._adoptNodes(furthestBlock, newElement);
- p.treeAdapter.appendChild(furthestBlock, newElement);
-
- p.activeFormattingElements.insertElementAfterBookmark(newElement, formattingElementEntry.token);
- p.activeFormattingElements.removeEntry(formattingElementEntry);
-
- p.openElements.remove(formattingElementEntry.element);
- p.openElements.insertAfter(furthestBlock, newElement);
-}
-
-//Algorithm entry point
-function callAdoptionAgency(p, token) {
- let formattingElementEntry;
-
- for (let i = 0; i < AA_OUTER_LOOP_ITER; i++) {
- formattingElementEntry = aaObtainFormattingElementEntry(p, token, formattingElementEntry);
-
- if (!formattingElementEntry) {
- break;
- }
-
- const furthestBlock = aaObtainFurthestBlock(p, formattingElementEntry);
-
- if (!furthestBlock) {
- break;
- }
-
- p.activeFormattingElements.bookmark = formattingElementEntry;
-
- const lastElement = aaInnerLoop(p, furthestBlock, formattingElementEntry.element);
- const commonAncestor = p.openElements.getCommonAncestor(formattingElementEntry.element);
-
- p.treeAdapter.detachNode(lastElement);
- aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement);
- aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry);
- }
-}
-
-//Generic token handlers
-//------------------------------------------------------------------
-function ignoreToken() {
- //NOTE: do nothing =)
-}
-
-function misplacedDoctype(p) {
- p._err(ERR.misplacedDoctype);
-}
-
-function appendComment(p, token) {
- p._appendCommentNode(token, p.openElements.currentTmplContent || p.openElements.current);
-}
-
-function appendCommentToRootHtmlElement(p, token) {
- p._appendCommentNode(token, p.openElements.items[0]);
-}
-
-function appendCommentToDocument(p, token) {
- p._appendCommentNode(token, p.document);
-}
-
-function insertCharacters(p, token) {
- p._insertCharacters(token);
-}
-
-function stopParsing(p) {
- p.stopped = true;
-}
-
-// The "initial" insertion mode
-//------------------------------------------------------------------
-function doctypeInInitialMode(p, token) {
- p._setDocumentType(token);
-
- const mode = token.forceQuirks ? HTML.DOCUMENT_MODE.QUIRKS : doctype.getDocumentMode(token);
-
- if (!doctype.isConforming(token)) {
- p._err(ERR.nonConformingDoctype);
- }
-
- p.treeAdapter.setDocumentMode(p.document, mode);
-
- p.insertionMode = BEFORE_HTML_MODE;
-}
-
-function tokenInInitialMode(p, token) {
- p._err(ERR.missingDoctype, { beforeToken: true });
- p.treeAdapter.setDocumentMode(p.document, HTML.DOCUMENT_MODE.QUIRKS);
- p.insertionMode = BEFORE_HTML_MODE;
- p._processToken(token);
-}
-
-// The "before html" insertion mode
-//------------------------------------------------------------------
-function startTagBeforeHtml(p, token) {
- if (token.tagName === $.HTML) {
- p._insertElement(token, NS.HTML);
- p.insertionMode = BEFORE_HEAD_MODE;
- } else {
- tokenBeforeHtml(p, token);
- }
-}
-
-function endTagBeforeHtml(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HTML || tn === $.HEAD || tn === $.BODY || tn === $.BR) {
- tokenBeforeHtml(p, token);
- }
-}
-
-function tokenBeforeHtml(p, token) {
- p._insertFakeRootElement();
- p.insertionMode = BEFORE_HEAD_MODE;
- p._processToken(token);
-}
-
-// The "before head" insertion mode
-//------------------------------------------------------------------
-function startTagBeforeHead(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HTML) {
- startTagInBody(p, token);
- } else if (tn === $.HEAD) {
- p._insertElement(token, NS.HTML);
- p.headElement = p.openElements.current;
- p.insertionMode = IN_HEAD_MODE;
- } else {
- tokenBeforeHead(p, token);
- }
-}
-
-function endTagBeforeHead(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HEAD || tn === $.BODY || tn === $.HTML || tn === $.BR) {
- tokenBeforeHead(p, token);
- } else {
- p._err(ERR.endTagWithoutMatchingOpenElement);
- }
-}
-
-function tokenBeforeHead(p, token) {
- p._insertFakeElement($.HEAD);
- p.headElement = p.openElements.current;
- p.insertionMode = IN_HEAD_MODE;
- p._processToken(token);
-}
-
-// The "in head" insertion mode
-//------------------------------------------------------------------
-function startTagInHead(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HTML) {
- startTagInBody(p, token);
- } else if (tn === $.BASE || tn === $.BASEFONT || tn === $.BGSOUND || tn === $.LINK || tn === $.META) {
- p._appendElement(token, NS.HTML);
- token.ackSelfClosing = true;
- } else if (tn === $.TITLE) {
- p._switchToTextParsing(token, Tokenizer.MODE.RCDATA);
- } else if (tn === $.NOSCRIPT) {
- if (p.options.scriptingEnabled) {
- p._switchToTextParsing(token, Tokenizer.MODE.RAWTEXT);
- } else {
- p._insertElement(token, NS.HTML);
- p.insertionMode = IN_HEAD_NO_SCRIPT_MODE;
- }
- } else if (tn === $.NOFRAMES || tn === $.STYLE) {
- p._switchToTextParsing(token, Tokenizer.MODE.RAWTEXT);
- } else if (tn === $.SCRIPT) {
- p._switchToTextParsing(token, Tokenizer.MODE.SCRIPT_DATA);
- } else if (tn === $.TEMPLATE) {
- p._insertTemplate(token, NS.HTML);
- p.activeFormattingElements.insertMarker();
- p.framesetOk = false;
- p.insertionMode = IN_TEMPLATE_MODE;
- p._pushTmplInsertionMode(IN_TEMPLATE_MODE);
- } else if (tn === $.HEAD) {
- p._err(ERR.misplacedStartTagForHeadElement);
- } else {
- tokenInHead(p, token);
- }
-}
-
-function endTagInHead(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HEAD) {
- p.openElements.pop();
- p.insertionMode = AFTER_HEAD_MODE;
- } else if (tn === $.BODY || tn === $.BR || tn === $.HTML) {
- tokenInHead(p, token);
- } else if (tn === $.TEMPLATE) {
- if (p.openElements.tmplCount > 0) {
- p.openElements.generateImpliedEndTagsThoroughly();
-
- if (p.openElements.currentTagName !== $.TEMPLATE) {
- p._err(ERR.closingOfElementWithOpenChildElements);
- }
-
- p.openElements.popUntilTagNamePopped($.TEMPLATE);
- p.activeFormattingElements.clearToLastMarker();
- p._popTmplInsertionMode();
- p._resetInsertionMode();
- } else {
- p._err(ERR.endTagWithoutMatchingOpenElement);
- }
- } else {
- p._err(ERR.endTagWithoutMatchingOpenElement);
- }
-}
-
-function tokenInHead(p, token) {
- p.openElements.pop();
- p.insertionMode = AFTER_HEAD_MODE;
- p._processToken(token);
-}
-
-// The "in head no script" insertion mode
-//------------------------------------------------------------------
-function startTagInHeadNoScript(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HTML) {
- startTagInBody(p, token);
- } else if (
- tn === $.BASEFONT ||
- tn === $.BGSOUND ||
- tn === $.HEAD ||
- tn === $.LINK ||
- tn === $.META ||
- tn === $.NOFRAMES ||
- tn === $.STYLE
- ) {
- startTagInHead(p, token);
- } else if (tn === $.NOSCRIPT) {
- p._err(ERR.nestedNoscriptInHead);
- } else {
- tokenInHeadNoScript(p, token);
- }
-}
-
-function endTagInHeadNoScript(p, token) {
- const tn = token.tagName;
-
- if (tn === $.NOSCRIPT) {
- p.openElements.pop();
- p.insertionMode = IN_HEAD_MODE;
- } else if (tn === $.BR) {
- tokenInHeadNoScript(p, token);
- } else {
- p._err(ERR.endTagWithoutMatchingOpenElement);
- }
-}
-
-function tokenInHeadNoScript(p, token) {
- const errCode =
- token.type === Tokenizer.EOF_TOKEN ? ERR.openElementsLeftAfterEof : ERR.disallowedContentInNoscriptInHead;
-
- p._err(errCode);
- p.openElements.pop();
- p.insertionMode = IN_HEAD_MODE;
- p._processToken(token);
-}
-
-// The "after head" insertion mode
-//------------------------------------------------------------------
-function startTagAfterHead(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HTML) {
- startTagInBody(p, token);
- } else if (tn === $.BODY) {
- p._insertElement(token, NS.HTML);
- p.framesetOk = false;
- p.insertionMode = IN_BODY_MODE;
- } else if (tn === $.FRAMESET) {
- p._insertElement(token, NS.HTML);
- p.insertionMode = IN_FRAMESET_MODE;
- } else if (
- tn === $.BASE ||
- tn === $.BASEFONT ||
- tn === $.BGSOUND ||
- tn === $.LINK ||
- tn === $.META ||
- tn === $.NOFRAMES ||
- tn === $.SCRIPT ||
- tn === $.STYLE ||
- tn === $.TEMPLATE ||
- tn === $.TITLE
- ) {
- p._err(ERR.abandonedHeadElementChild);
- p.openElements.push(p.headElement);
- startTagInHead(p, token);
- p.openElements.remove(p.headElement);
- } else if (tn === $.HEAD) {
- p._err(ERR.misplacedStartTagForHeadElement);
- } else {
- tokenAfterHead(p, token);
- }
-}
-
-function endTagAfterHead(p, token) {
- const tn = token.tagName;
-
- if (tn === $.BODY || tn === $.HTML || tn === $.BR) {
- tokenAfterHead(p, token);
- } else if (tn === $.TEMPLATE) {
- endTagInHead(p, token);
- } else {
- p._err(ERR.endTagWithoutMatchingOpenElement);
- }
-}
-
-function tokenAfterHead(p, token) {
- p._insertFakeElement($.BODY);
- p.insertionMode = IN_BODY_MODE;
- p._processToken(token);
-}
-
-// The "in body" insertion mode
-//------------------------------------------------------------------
-function whitespaceCharacterInBody(p, token) {
- p._reconstructActiveFormattingElements();
- p._insertCharacters(token);
-}
-
-function characterInBody(p, token) {
- p._reconstructActiveFormattingElements();
- p._insertCharacters(token);
- p.framesetOk = false;
-}
-
-function htmlStartTagInBody(p, token) {
- if (p.openElements.tmplCount === 0) {
- p.treeAdapter.adoptAttributes(p.openElements.items[0], token.attrs);
- }
-}
-
-function bodyStartTagInBody(p, token) {
- const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
-
- if (bodyElement && p.openElements.tmplCount === 0) {
- p.framesetOk = false;
- p.treeAdapter.adoptAttributes(bodyElement, token.attrs);
- }
-}
-
-function framesetStartTagInBody(p, token) {
- const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
-
- if (p.framesetOk && bodyElement) {
- p.treeAdapter.detachNode(bodyElement);
- p.openElements.popAllUpToHtmlElement();
- p._insertElement(token, NS.HTML);
- p.insertionMode = IN_FRAMESET_MODE;
- }
-}
-
-function addressStartTagInBody(p, token) {
- if (p.openElements.hasInButtonScope($.P)) {
- p._closePElement();
- }
-
- p._insertElement(token, NS.HTML);
-}
-
-function numberedHeaderStartTagInBody(p, token) {
- if (p.openElements.hasInButtonScope($.P)) {
- p._closePElement();
- }
-
- const tn = p.openElements.currentTagName;
-
- if (tn === $.H1 || tn === $.H2 || tn === $.H3 || tn === $.H4 || tn === $.H5 || tn === $.H6) {
- p.openElements.pop();
- }
-
- p._insertElement(token, NS.HTML);
-}
-
-function preStartTagInBody(p, token) {
- if (p.openElements.hasInButtonScope($.P)) {
- p._closePElement();
- }
-
- p._insertElement(token, NS.HTML);
- //NOTE: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move
- //on to the next one. (Newlines at the start of pre blocks are ignored as an authoring convenience.)
- p.skipNextNewLine = true;
- p.framesetOk = false;
-}
-
-function formStartTagInBody(p, token) {
- const inTemplate = p.openElements.tmplCount > 0;
-
- if (!p.formElement || inTemplate) {
- if (p.openElements.hasInButtonScope($.P)) {
- p._closePElement();
- }
-
- p._insertElement(token, NS.HTML);
-
- if (!inTemplate) {
- p.formElement = p.openElements.current;
- }
- }
-}
-
-function listItemStartTagInBody(p, token) {
- p.framesetOk = false;
-
- const tn = token.tagName;
-
- for (let i = p.openElements.stackTop; i >= 0; i--) {
- const element = p.openElements.items[i];
- const elementTn = p.treeAdapter.getTagName(element);
- let closeTn = null;
-
- if (tn === $.LI && elementTn === $.LI) {
- closeTn = $.LI;
- } else if ((tn === $.DD || tn === $.DT) && (elementTn === $.DD || elementTn === $.DT)) {
- closeTn = elementTn;
- }
-
- if (closeTn) {
- p.openElements.generateImpliedEndTagsWithExclusion(closeTn);
- p.openElements.popUntilTagNamePopped(closeTn);
- break;
- }
-
- if (elementTn !== $.ADDRESS && elementTn !== $.DIV && elementTn !== $.P && p._isSpecialElement(element)) {
- break;
- }
- }
-
- if (p.openElements.hasInButtonScope($.P)) {
- p._closePElement();
- }
-
- p._insertElement(token, NS.HTML);
-}
-
-function plaintextStartTagInBody(p, token) {
- if (p.openElements.hasInButtonScope($.P)) {
- p._closePElement();
- }
-
- p._insertElement(token, NS.HTML);
- p.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
-}
-
-function buttonStartTagInBody(p, token) {
- if (p.openElements.hasInScope($.BUTTON)) {
- p.openElements.generateImpliedEndTags();
- p.openElements.popUntilTagNamePopped($.BUTTON);
- }
-
- p._reconstructActiveFormattingElements();
- p._insertElement(token, NS.HTML);
- p.framesetOk = false;
-}
-
-function aStartTagInBody(p, token) {
- const activeElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName($.A);
-
- if (activeElementEntry) {
- callAdoptionAgency(p, token);
- p.openElements.remove(activeElementEntry.element);
- p.activeFormattingElements.removeEntry(activeElementEntry);
- }
-
- p._reconstructActiveFormattingElements();
- p._insertElement(token, NS.HTML);
- p.activeFormattingElements.pushElement(p.openElements.current, token);
-}
-
-function bStartTagInBody(p, token) {
- p._reconstructActiveFormattingElements();
- p._insertElement(token, NS.HTML);
- p.activeFormattingElements.pushElement(p.openElements.current, token);
-}
-
-function nobrStartTagInBody(p, token) {
- p._reconstructActiveFormattingElements();
-
- if (p.openElements.hasInScope($.NOBR)) {
- callAdoptionAgency(p, token);
- p._reconstructActiveFormattingElements();
- }
-
- p._insertElement(token, NS.HTML);
- p.activeFormattingElements.pushElement(p.openElements.current, token);
-}
-
-function appletStartTagInBody(p, token) {
- p._reconstructActiveFormattingElements();
- p._insertElement(token, NS.HTML);
- p.activeFormattingElements.insertMarker();
- p.framesetOk = false;
-}
-
-function tableStartTagInBody(p, token) {
- if (
- p.treeAdapter.getDocumentMode(p.document) !== HTML.DOCUMENT_MODE.QUIRKS &&
- p.openElements.hasInButtonScope($.P)
- ) {
- p._closePElement();
- }
-
- p._insertElement(token, NS.HTML);
- p.framesetOk = false;
- p.insertionMode = IN_TABLE_MODE;
-}
-
-function areaStartTagInBody(p, token) {
- p._reconstructActiveFormattingElements();
- p._appendElement(token, NS.HTML);
- p.framesetOk = false;
- token.ackSelfClosing = true;
-}
-
-function inputStartTagInBody(p, token) {
- p._reconstructActiveFormattingElements();
- p._appendElement(token, NS.HTML);
-
- const inputType = Tokenizer.getTokenAttr(token, ATTRS.TYPE);
-
- if (!inputType || inputType.toLowerCase() !== HIDDEN_INPUT_TYPE) {
- p.framesetOk = false;
- }
-
- token.ackSelfClosing = true;
-}
-
-function paramStartTagInBody(p, token) {
- p._appendElement(token, NS.HTML);
- token.ackSelfClosing = true;
-}
-
-function hrStartTagInBody(p, token) {
- if (p.openElements.hasInButtonScope($.P)) {
- p._closePElement();
- }
-
- p._appendElement(token, NS.HTML);
- p.framesetOk = false;
- p.ackSelfClosing = true;
-}
-
-function imageStartTagInBody(p, token) {
- token.tagName = $.IMG;
- areaStartTagInBody(p, token);
-}
-
-function textareaStartTagInBody(p, token) {
- p._insertElement(token, NS.HTML);
- //NOTE: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move
- //on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
- p.skipNextNewLine = true;
- p.tokenizer.state = Tokenizer.MODE.RCDATA;
- p.originalInsertionMode = p.insertionMode;
- p.framesetOk = false;
- p.insertionMode = TEXT_MODE;
-}
-
-function xmpStartTagInBody(p, token) {
- if (p.openElements.hasInButtonScope($.P)) {
- p._closePElement();
- }
-
- p._reconstructActiveFormattingElements();
- p.framesetOk = false;
- p._switchToTextParsing(token, Tokenizer.MODE.RAWTEXT);
-}
-
-function iframeStartTagInBody(p, token) {
- p.framesetOk = false;
- p._switchToTextParsing(token, Tokenizer.MODE.RAWTEXT);
-}
-
-//NOTE: here we assume that we always act as an user agent with enabled plugins, so we parse
-// as a rawtext.
-function noembedStartTagInBody(p, token) {
- p._switchToTextParsing(token, Tokenizer.MODE.RAWTEXT);
-}
-
-function selectStartTagInBody(p, token) {
- p._reconstructActiveFormattingElements();
- p._insertElement(token, NS.HTML);
- p.framesetOk = false;
-
- if (
- p.insertionMode === IN_TABLE_MODE ||
- p.insertionMode === IN_CAPTION_MODE ||
- p.insertionMode === IN_TABLE_BODY_MODE ||
- p.insertionMode === IN_ROW_MODE ||
- p.insertionMode === IN_CELL_MODE
- ) {
- p.insertionMode = IN_SELECT_IN_TABLE_MODE;
- } else {
- p.insertionMode = IN_SELECT_MODE;
- }
-}
-
-function optgroupStartTagInBody(p, token) {
- if (p.openElements.currentTagName === $.OPTION) {
- p.openElements.pop();
- }
-
- p._reconstructActiveFormattingElements();
- p._insertElement(token, NS.HTML);
-}
-
-function rbStartTagInBody(p, token) {
- if (p.openElements.hasInScope($.RUBY)) {
- p.openElements.generateImpliedEndTags();
- }
-
- p._insertElement(token, NS.HTML);
-}
-
-function rtStartTagInBody(p, token) {
- if (p.openElements.hasInScope($.RUBY)) {
- p.openElements.generateImpliedEndTagsWithExclusion($.RTC);
- }
-
- p._insertElement(token, NS.HTML);
-}
-
-function menuStartTagInBody(p, token) {
- if (p.openElements.hasInButtonScope($.P)) {
- p._closePElement();
- }
-
- p._insertElement(token, NS.HTML);
-}
-
-function mathStartTagInBody(p, token) {
- p._reconstructActiveFormattingElements();
-
- foreignContent.adjustTokenMathMLAttrs(token);
- foreignContent.adjustTokenXMLAttrs(token);
-
- if (token.selfClosing) {
- p._appendElement(token, NS.MATHML);
- } else {
- p._insertElement(token, NS.MATHML);
- }
-
- token.ackSelfClosing = true;
-}
-
-function svgStartTagInBody(p, token) {
- p._reconstructActiveFormattingElements();
-
- foreignContent.adjustTokenSVGAttrs(token);
- foreignContent.adjustTokenXMLAttrs(token);
-
- if (token.selfClosing) {
- p._appendElement(token, NS.SVG);
- } else {
- p._insertElement(token, NS.SVG);
- }
-
- token.ackSelfClosing = true;
-}
-
-function genericStartTagInBody(p, token) {
- p._reconstructActiveFormattingElements();
- p._insertElement(token, NS.HTML);
-}
-
-//OPTIMIZATION: Integer comparisons are low-cost, so we can use very fast tag name length filters here.
-//It's faster than using dictionary.
-function startTagInBody(p, token) {
- const tn = token.tagName;
-
- switch (tn.length) {
- case 1:
- if (tn === $.I || tn === $.S || tn === $.B || tn === $.U) {
- bStartTagInBody(p, token);
- } else if (tn === $.P) {
- addressStartTagInBody(p, token);
- } else if (tn === $.A) {
- aStartTagInBody(p, token);
- } else {
- genericStartTagInBody(p, token);
- }
-
- break;
-
- case 2:
- if (tn === $.DL || tn === $.OL || tn === $.UL) {
- addressStartTagInBody(p, token);
- } else if (tn === $.H1 || tn === $.H2 || tn === $.H3 || tn === $.H4 || tn === $.H5 || tn === $.H6) {
- numberedHeaderStartTagInBody(p, token);
- } else if (tn === $.LI || tn === $.DD || tn === $.DT) {
- listItemStartTagInBody(p, token);
- } else if (tn === $.EM || tn === $.TT) {
- bStartTagInBody(p, token);
- } else if (tn === $.BR) {
- areaStartTagInBody(p, token);
- } else if (tn === $.HR) {
- hrStartTagInBody(p, token);
- } else if (tn === $.RB) {
- rbStartTagInBody(p, token);
- } else if (tn === $.RT || tn === $.RP) {
- rtStartTagInBody(p, token);
- } else if (tn !== $.TH && tn !== $.TD && tn !== $.TR) {
- genericStartTagInBody(p, token);
- }
-
- break;
-
- case 3:
- if (tn === $.DIV || tn === $.DIR || tn === $.NAV) {
- addressStartTagInBody(p, token);
- } else if (tn === $.PRE) {
- preStartTagInBody(p, token);
- } else if (tn === $.BIG) {
- bStartTagInBody(p, token);
- } else if (tn === $.IMG || tn === $.WBR) {
- areaStartTagInBody(p, token);
- } else if (tn === $.XMP) {
- xmpStartTagInBody(p, token);
- } else if (tn === $.SVG) {
- svgStartTagInBody(p, token);
- } else if (tn === $.RTC) {
- rbStartTagInBody(p, token);
- } else if (tn !== $.COL) {
- genericStartTagInBody(p, token);
- }
-
- break;
-
- case 4:
- if (tn === $.HTML) {
- htmlStartTagInBody(p, token);
- } else if (tn === $.BASE || tn === $.LINK || tn === $.META) {
- startTagInHead(p, token);
- } else if (tn === $.BODY) {
- bodyStartTagInBody(p, token);
- } else if (tn === $.MAIN || tn === $.MENU) {
- addressStartTagInBody(p, token);
- } else if (tn === $.FORM) {
- formStartTagInBody(p, token);
- } else if (tn === $.CODE || tn === $.FONT) {
- bStartTagInBody(p, token);
- } else if (tn === $.NOBR) {
- nobrStartTagInBody(p, token);
- } else if (tn === $.AREA) {
- areaStartTagInBody(p, token);
- } else if (tn === $.MATH) {
- mathStartTagInBody(p, token);
- } else if (tn === $.MENU) {
- menuStartTagInBody(p, token);
- } else if (tn !== $.HEAD) {
- genericStartTagInBody(p, token);
- }
-
- break;
-
- case 5:
- if (tn === $.STYLE || tn === $.TITLE) {
- startTagInHead(p, token);
- } else if (tn === $.ASIDE) {
- addressStartTagInBody(p, token);
- } else if (tn === $.SMALL) {
- bStartTagInBody(p, token);
- } else if (tn === $.TABLE) {
- tableStartTagInBody(p, token);
- } else if (tn === $.EMBED) {
- areaStartTagInBody(p, token);
- } else if (tn === $.INPUT) {
- inputStartTagInBody(p, token);
- } else if (tn === $.PARAM || tn === $.TRACK) {
- paramStartTagInBody(p, token);
- } else if (tn === $.IMAGE) {
- imageStartTagInBody(p, token);
- } else if (tn !== $.FRAME && tn !== $.TBODY && tn !== $.TFOOT && tn !== $.THEAD) {
- genericStartTagInBody(p, token);
- }
-
- break;
-
- case 6:
- if (tn === $.SCRIPT) {
- startTagInHead(p, token);
- } else if (
- tn === $.CENTER ||
- tn === $.FIGURE ||
- tn === $.FOOTER ||
- tn === $.HEADER ||
- tn === $.HGROUP ||
- tn === $.DIALOG
- ) {
- addressStartTagInBody(p, token);
- } else if (tn === $.BUTTON) {
- buttonStartTagInBody(p, token);
- } else if (tn === $.STRIKE || tn === $.STRONG) {
- bStartTagInBody(p, token);
- } else if (tn === $.APPLET || tn === $.OBJECT) {
- appletStartTagInBody(p, token);
- } else if (tn === $.KEYGEN) {
- areaStartTagInBody(p, token);
- } else if (tn === $.SOURCE) {
- paramStartTagInBody(p, token);
- } else if (tn === $.IFRAME) {
- iframeStartTagInBody(p, token);
- } else if (tn === $.SELECT) {
- selectStartTagInBody(p, token);
- } else if (tn === $.OPTION) {
- optgroupStartTagInBody(p, token);
- } else {
- genericStartTagInBody(p, token);
- }
-
- break;
-
- case 7:
- if (tn === $.BGSOUND) {
- startTagInHead(p, token);
- } else if (
- tn === $.DETAILS ||
- tn === $.ADDRESS ||
- tn === $.ARTICLE ||
- tn === $.SECTION ||
- tn === $.SUMMARY
- ) {
- addressStartTagInBody(p, token);
- } else if (tn === $.LISTING) {
- preStartTagInBody(p, token);
- } else if (tn === $.MARQUEE) {
- appletStartTagInBody(p, token);
- } else if (tn === $.NOEMBED) {
- noembedStartTagInBody(p, token);
- } else if (tn !== $.CAPTION) {
- genericStartTagInBody(p, token);
- }
-
- break;
-
- case 8:
- if (tn === $.BASEFONT) {
- startTagInHead(p, token);
- } else if (tn === $.FRAMESET) {
- framesetStartTagInBody(p, token);
- } else if (tn === $.FIELDSET) {
- addressStartTagInBody(p, token);
- } else if (tn === $.TEXTAREA) {
- textareaStartTagInBody(p, token);
- } else if (tn === $.TEMPLATE) {
- startTagInHead(p, token);
- } else if (tn === $.NOSCRIPT) {
- if (p.options.scriptingEnabled) {
- noembedStartTagInBody(p, token);
- } else {
- genericStartTagInBody(p, token);
- }
- } else if (tn === $.OPTGROUP) {
- optgroupStartTagInBody(p, token);
- } else if (tn !== $.COLGROUP) {
- genericStartTagInBody(p, token);
- }
-
- break;
-
- case 9:
- if (tn === $.PLAINTEXT) {
- plaintextStartTagInBody(p, token);
- } else {
- genericStartTagInBody(p, token);
- }
-
- break;
-
- case 10:
- if (tn === $.BLOCKQUOTE || tn === $.FIGCAPTION) {
- addressStartTagInBody(p, token);
- } else {
- genericStartTagInBody(p, token);
- }
-
- break;
-
- default:
- genericStartTagInBody(p, token);
- }
-}
-
-function bodyEndTagInBody(p) {
- if (p.openElements.hasInScope($.BODY)) {
- p.insertionMode = AFTER_BODY_MODE;
- }
-}
-
-function htmlEndTagInBody(p, token) {
- if (p.openElements.hasInScope($.BODY)) {
- p.insertionMode = AFTER_BODY_MODE;
- p._processToken(token);
- }
-}
-
-function addressEndTagInBody(p, token) {
- const tn = token.tagName;
-
- if (p.openElements.hasInScope(tn)) {
- p.openElements.generateImpliedEndTags();
- p.openElements.popUntilTagNamePopped(tn);
- }
-}
-
-function formEndTagInBody(p) {
- const inTemplate = p.openElements.tmplCount > 0;
- const formElement = p.formElement;
-
- if (!inTemplate) {
- p.formElement = null;
- }
-
- if ((formElement || inTemplate) && p.openElements.hasInScope($.FORM)) {
- p.openElements.generateImpliedEndTags();
-
- if (inTemplate) {
- p.openElements.popUntilTagNamePopped($.FORM);
- } else {
- p.openElements.remove(formElement);
- }
- }
-}
-
-function pEndTagInBody(p) {
- if (!p.openElements.hasInButtonScope($.P)) {
- p._insertFakeElement($.P);
- }
-
- p._closePElement();
-}
-
-function liEndTagInBody(p) {
- if (p.openElements.hasInListItemScope($.LI)) {
- p.openElements.generateImpliedEndTagsWithExclusion($.LI);
- p.openElements.popUntilTagNamePopped($.LI);
- }
-}
-
-function ddEndTagInBody(p, token) {
- const tn = token.tagName;
-
- if (p.openElements.hasInScope(tn)) {
- p.openElements.generateImpliedEndTagsWithExclusion(tn);
- p.openElements.popUntilTagNamePopped(tn);
- }
-}
-
-function numberedHeaderEndTagInBody(p) {
- if (p.openElements.hasNumberedHeaderInScope()) {
- p.openElements.generateImpliedEndTags();
- p.openElements.popUntilNumberedHeaderPopped();
- }
-}
-
-function appletEndTagInBody(p, token) {
- const tn = token.tagName;
-
- if (p.openElements.hasInScope(tn)) {
- p.openElements.generateImpliedEndTags();
- p.openElements.popUntilTagNamePopped(tn);
- p.activeFormattingElements.clearToLastMarker();
- }
-}
-
-function brEndTagInBody(p) {
- p._reconstructActiveFormattingElements();
- p._insertFakeElement($.BR);
- p.openElements.pop();
- p.framesetOk = false;
-}
-
-function genericEndTagInBody(p, token) {
- const tn = token.tagName;
-
- for (let i = p.openElements.stackTop; i > 0; i--) {
- const element = p.openElements.items[i];
-
- if (p.treeAdapter.getTagName(element) === tn) {
- p.openElements.generateImpliedEndTagsWithExclusion(tn);
- p.openElements.popUntilElementPopped(element);
- break;
- }
-
- if (p._isSpecialElement(element)) {
- break;
- }
- }
-}
-
-//OPTIMIZATION: Integer comparisons are low-cost, so we can use very fast tag name length filters here.
-//It's faster than using dictionary.
-function endTagInBody(p, token) {
- const tn = token.tagName;
-
- switch (tn.length) {
- case 1:
- if (tn === $.A || tn === $.B || tn === $.I || tn === $.S || tn === $.U) {
- callAdoptionAgency(p, token);
- } else if (tn === $.P) {
- pEndTagInBody(p, token);
- } else {
- genericEndTagInBody(p, token);
- }
-
- break;
-
- case 2:
- if (tn === $.DL || tn === $.UL || tn === $.OL) {
- addressEndTagInBody(p, token);
- } else if (tn === $.LI) {
- liEndTagInBody(p, token);
- } else if (tn === $.DD || tn === $.DT) {
- ddEndTagInBody(p, token);
- } else if (tn === $.H1 || tn === $.H2 || tn === $.H3 || tn === $.H4 || tn === $.H5 || tn === $.H6) {
- numberedHeaderEndTagInBody(p, token);
- } else if (tn === $.BR) {
- brEndTagInBody(p, token);
- } else if (tn === $.EM || tn === $.TT) {
- callAdoptionAgency(p, token);
- } else {
- genericEndTagInBody(p, token);
- }
-
- break;
-
- case 3:
- if (tn === $.BIG) {
- callAdoptionAgency(p, token);
- } else if (tn === $.DIR || tn === $.DIV || tn === $.NAV || tn === $.PRE) {
- addressEndTagInBody(p, token);
- } else {
- genericEndTagInBody(p, token);
- }
-
- break;
-
- case 4:
- if (tn === $.BODY) {
- bodyEndTagInBody(p, token);
- } else if (tn === $.HTML) {
- htmlEndTagInBody(p, token);
- } else if (tn === $.FORM) {
- formEndTagInBody(p, token);
- } else if (tn === $.CODE || tn === $.FONT || tn === $.NOBR) {
- callAdoptionAgency(p, token);
- } else if (tn === $.MAIN || tn === $.MENU) {
- addressEndTagInBody(p, token);
- } else {
- genericEndTagInBody(p, token);
- }
-
- break;
-
- case 5:
- if (tn === $.ASIDE) {
- addressEndTagInBody(p, token);
- } else if (tn === $.SMALL) {
- callAdoptionAgency(p, token);
- } else {
- genericEndTagInBody(p, token);
- }
-
- break;
-
- case 6:
- if (
- tn === $.CENTER ||
- tn === $.FIGURE ||
- tn === $.FOOTER ||
- tn === $.HEADER ||
- tn === $.HGROUP ||
- tn === $.DIALOG
- ) {
- addressEndTagInBody(p, token);
- } else if (tn === $.APPLET || tn === $.OBJECT) {
- appletEndTagInBody(p, token);
- } else if (tn === $.STRIKE || tn === $.STRONG) {
- callAdoptionAgency(p, token);
- } else {
- genericEndTagInBody(p, token);
- }
-
- break;
-
- case 7:
- if (
- tn === $.ADDRESS ||
- tn === $.ARTICLE ||
- tn === $.DETAILS ||
- tn === $.SECTION ||
- tn === $.SUMMARY ||
- tn === $.LISTING
- ) {
- addressEndTagInBody(p, token);
- } else if (tn === $.MARQUEE) {
- appletEndTagInBody(p, token);
- } else {
- genericEndTagInBody(p, token);
- }
-
- break;
-
- case 8:
- if (tn === $.FIELDSET) {
- addressEndTagInBody(p, token);
- } else if (tn === $.TEMPLATE) {
- endTagInHead(p, token);
- } else {
- genericEndTagInBody(p, token);
- }
-
- break;
-
- case 10:
- if (tn === $.BLOCKQUOTE || tn === $.FIGCAPTION) {
- addressEndTagInBody(p, token);
- } else {
- genericEndTagInBody(p, token);
- }
-
- break;
-
- default:
- genericEndTagInBody(p, token);
- }
-}
-
-function eofInBody(p, token) {
- if (p.tmplInsertionModeStackTop > -1) {
- eofInTemplate(p, token);
- } else {
- p.stopped = true;
- }
-}
-
-// The "text" insertion mode
-//------------------------------------------------------------------
-function endTagInText(p, token) {
- if (token.tagName === $.SCRIPT) {
- p.pendingScript = p.openElements.current;
- }
-
- p.openElements.pop();
- p.insertionMode = p.originalInsertionMode;
-}
-
-function eofInText(p, token) {
- p._err(ERR.eofInElementThatCanContainOnlyText);
- p.openElements.pop();
- p.insertionMode = p.originalInsertionMode;
- p._processToken(token);
-}
-
-// The "in table" insertion mode
-//------------------------------------------------------------------
-function characterInTable(p, token) {
- const curTn = p.openElements.currentTagName;
-
- if (curTn === $.TABLE || curTn === $.TBODY || curTn === $.TFOOT || curTn === $.THEAD || curTn === $.TR) {
- p.pendingCharacterTokens = [];
- p.hasNonWhitespacePendingCharacterToken = false;
- p.originalInsertionMode = p.insertionMode;
- p.insertionMode = IN_TABLE_TEXT_MODE;
- p._processToken(token);
- } else {
- tokenInTable(p, token);
- }
-}
-
-function captionStartTagInTable(p, token) {
- p.openElements.clearBackToTableContext();
- p.activeFormattingElements.insertMarker();
- p._insertElement(token, NS.HTML);
- p.insertionMode = IN_CAPTION_MODE;
-}
-
-function colgroupStartTagInTable(p, token) {
- p.openElements.clearBackToTableContext();
- p._insertElement(token, NS.HTML);
- p.insertionMode = IN_COLUMN_GROUP_MODE;
-}
-
-function colStartTagInTable(p, token) {
- p.openElements.clearBackToTableContext();
- p._insertFakeElement($.COLGROUP);
- p.insertionMode = IN_COLUMN_GROUP_MODE;
- p._processToken(token);
-}
-
-function tbodyStartTagInTable(p, token) {
- p.openElements.clearBackToTableContext();
- p._insertElement(token, NS.HTML);
- p.insertionMode = IN_TABLE_BODY_MODE;
-}
-
-function tdStartTagInTable(p, token) {
- p.openElements.clearBackToTableContext();
- p._insertFakeElement($.TBODY);
- p.insertionMode = IN_TABLE_BODY_MODE;
- p._processToken(token);
-}
-
-function tableStartTagInTable(p, token) {
- if (p.openElements.hasInTableScope($.TABLE)) {
- p.openElements.popUntilTagNamePopped($.TABLE);
- p._resetInsertionMode();
- p._processToken(token);
- }
-}
-
-function inputStartTagInTable(p, token) {
- const inputType = Tokenizer.getTokenAttr(token, ATTRS.TYPE);
-
- if (inputType && inputType.toLowerCase() === HIDDEN_INPUT_TYPE) {
- p._appendElement(token, NS.HTML);
- } else {
- tokenInTable(p, token);
- }
-
- token.ackSelfClosing = true;
-}
-
-function formStartTagInTable(p, token) {
- if (!p.formElement && p.openElements.tmplCount === 0) {
- p._insertElement(token, NS.HTML);
- p.formElement = p.openElements.current;
- p.openElements.pop();
- }
-}
-
-function startTagInTable(p, token) {
- const tn = token.tagName;
-
- switch (tn.length) {
- case 2:
- if (tn === $.TD || tn === $.TH || tn === $.TR) {
- tdStartTagInTable(p, token);
- } else {
- tokenInTable(p, token);
- }
-
- break;
-
- case 3:
- if (tn === $.COL) {
- colStartTagInTable(p, token);
- } else {
- tokenInTable(p, token);
- }
-
- break;
-
- case 4:
- if (tn === $.FORM) {
- formStartTagInTable(p, token);
- } else {
- tokenInTable(p, token);
- }
-
- break;
-
- case 5:
- if (tn === $.TABLE) {
- tableStartTagInTable(p, token);
- } else if (tn === $.STYLE) {
- startTagInHead(p, token);
- } else if (tn === $.TBODY || tn === $.TFOOT || tn === $.THEAD) {
- tbodyStartTagInTable(p, token);
- } else if (tn === $.INPUT) {
- inputStartTagInTable(p, token);
- } else {
- tokenInTable(p, token);
- }
-
- break;
-
- case 6:
- if (tn === $.SCRIPT) {
- startTagInHead(p, token);
- } else {
- tokenInTable(p, token);
- }
-
- break;
-
- case 7:
- if (tn === $.CAPTION) {
- captionStartTagInTable(p, token);
- } else {
- tokenInTable(p, token);
- }
-
- break;
-
- case 8:
- if (tn === $.COLGROUP) {
- colgroupStartTagInTable(p, token);
- } else if (tn === $.TEMPLATE) {
- startTagInHead(p, token);
- } else {
- tokenInTable(p, token);
- }
-
- break;
-
- default:
- tokenInTable(p, token);
- }
-}
-
-function endTagInTable(p, token) {
- const tn = token.tagName;
-
- if (tn === $.TABLE) {
- if (p.openElements.hasInTableScope($.TABLE)) {
- p.openElements.popUntilTagNamePopped($.TABLE);
- p._resetInsertionMode();
- }
- } else if (tn === $.TEMPLATE) {
- endTagInHead(p, token);
- } else if (
- tn !== $.BODY &&
- tn !== $.CAPTION &&
- tn !== $.COL &&
- tn !== $.COLGROUP &&
- tn !== $.HTML &&
- tn !== $.TBODY &&
- tn !== $.TD &&
- tn !== $.TFOOT &&
- tn !== $.TH &&
- tn !== $.THEAD &&
- tn !== $.TR
- ) {
- tokenInTable(p, token);
- }
-}
-
-function tokenInTable(p, token) {
- const savedFosterParentingState = p.fosterParentingEnabled;
-
- p.fosterParentingEnabled = true;
- p._processTokenInBodyMode(token);
- p.fosterParentingEnabled = savedFosterParentingState;
-}
-
-// The "in table text" insertion mode
-//------------------------------------------------------------------
-function whitespaceCharacterInTableText(p, token) {
- p.pendingCharacterTokens.push(token);
-}
-
-function characterInTableText(p, token) {
- p.pendingCharacterTokens.push(token);
- p.hasNonWhitespacePendingCharacterToken = true;
-}
-
-function tokenInTableText(p, token) {
- let i = 0;
-
- if (p.hasNonWhitespacePendingCharacterToken) {
- for (; i < p.pendingCharacterTokens.length; i++) {
- tokenInTable(p, p.pendingCharacterTokens[i]);
- }
- } else {
- for (; i < p.pendingCharacterTokens.length; i++) {
- p._insertCharacters(p.pendingCharacterTokens[i]);
- }
- }
-
- p.insertionMode = p.originalInsertionMode;
- p._processToken(token);
-}
-
-// The "in caption" insertion mode
-//------------------------------------------------------------------
-function startTagInCaption(p, token) {
- const tn = token.tagName;
-
- if (
- tn === $.CAPTION ||
- tn === $.COL ||
- tn === $.COLGROUP ||
- tn === $.TBODY ||
- tn === $.TD ||
- tn === $.TFOOT ||
- tn === $.TH ||
- tn === $.THEAD ||
- tn === $.TR
- ) {
- if (p.openElements.hasInTableScope($.CAPTION)) {
- p.openElements.generateImpliedEndTags();
- p.openElements.popUntilTagNamePopped($.CAPTION);
- p.activeFormattingElements.clearToLastMarker();
- p.insertionMode = IN_TABLE_MODE;
- p._processToken(token);
- }
- } else {
- startTagInBody(p, token);
- }
-}
-
-function endTagInCaption(p, token) {
- const tn = token.tagName;
-
- if (tn === $.CAPTION || tn === $.TABLE) {
- if (p.openElements.hasInTableScope($.CAPTION)) {
- p.openElements.generateImpliedEndTags();
- p.openElements.popUntilTagNamePopped($.CAPTION);
- p.activeFormattingElements.clearToLastMarker();
- p.insertionMode = IN_TABLE_MODE;
-
- if (tn === $.TABLE) {
- p._processToken(token);
- }
- }
- } else if (
- tn !== $.BODY &&
- tn !== $.COL &&
- tn !== $.COLGROUP &&
- tn !== $.HTML &&
- tn !== $.TBODY &&
- tn !== $.TD &&
- tn !== $.TFOOT &&
- tn !== $.TH &&
- tn !== $.THEAD &&
- tn !== $.TR
- ) {
- endTagInBody(p, token);
- }
-}
-
-// The "in column group" insertion mode
-//------------------------------------------------------------------
-function startTagInColumnGroup(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HTML) {
- startTagInBody(p, token);
- } else if (tn === $.COL) {
- p._appendElement(token, NS.HTML);
- token.ackSelfClosing = true;
- } else if (tn === $.TEMPLATE) {
- startTagInHead(p, token);
- } else {
- tokenInColumnGroup(p, token);
- }
-}
-
-function endTagInColumnGroup(p, token) {
- const tn = token.tagName;
-
- if (tn === $.COLGROUP) {
- if (p.openElements.currentTagName === $.COLGROUP) {
- p.openElements.pop();
- p.insertionMode = IN_TABLE_MODE;
- }
- } else if (tn === $.TEMPLATE) {
- endTagInHead(p, token);
- } else if (tn !== $.COL) {
- tokenInColumnGroup(p, token);
- }
-}
-
-function tokenInColumnGroup(p, token) {
- if (p.openElements.currentTagName === $.COLGROUP) {
- p.openElements.pop();
- p.insertionMode = IN_TABLE_MODE;
- p._processToken(token);
- }
-}
-
-// The "in table body" insertion mode
-//------------------------------------------------------------------
-function startTagInTableBody(p, token) {
- const tn = token.tagName;
-
- if (tn === $.TR) {
- p.openElements.clearBackToTableBodyContext();
- p._insertElement(token, NS.HTML);
- p.insertionMode = IN_ROW_MODE;
- } else if (tn === $.TH || tn === $.TD) {
- p.openElements.clearBackToTableBodyContext();
- p._insertFakeElement($.TR);
- p.insertionMode = IN_ROW_MODE;
- p._processToken(token);
- } else if (
- tn === $.CAPTION ||
- tn === $.COL ||
- tn === $.COLGROUP ||
- tn === $.TBODY ||
- tn === $.TFOOT ||
- tn === $.THEAD
- ) {
- if (p.openElements.hasTableBodyContextInTableScope()) {
- p.openElements.clearBackToTableBodyContext();
- p.openElements.pop();
- p.insertionMode = IN_TABLE_MODE;
- p._processToken(token);
- }
- } else {
- startTagInTable(p, token);
- }
-}
-
-function endTagInTableBody(p, token) {
- const tn = token.tagName;
-
- if (tn === $.TBODY || tn === $.TFOOT || tn === $.THEAD) {
- if (p.openElements.hasInTableScope(tn)) {
- p.openElements.clearBackToTableBodyContext();
- p.openElements.pop();
- p.insertionMode = IN_TABLE_MODE;
- }
- } else if (tn === $.TABLE) {
- if (p.openElements.hasTableBodyContextInTableScope()) {
- p.openElements.clearBackToTableBodyContext();
- p.openElements.pop();
- p.insertionMode = IN_TABLE_MODE;
- p._processToken(token);
- }
- } else if (
- (tn !== $.BODY && tn !== $.CAPTION && tn !== $.COL && tn !== $.COLGROUP) ||
- (tn !== $.HTML && tn !== $.TD && tn !== $.TH && tn !== $.TR)
- ) {
- endTagInTable(p, token);
- }
-}
-
-// The "in row" insertion mode
-//------------------------------------------------------------------
-function startTagInRow(p, token) {
- const tn = token.tagName;
-
- if (tn === $.TH || tn === $.TD) {
- p.openElements.clearBackToTableRowContext();
- p._insertElement(token, NS.HTML);
- p.insertionMode = IN_CELL_MODE;
- p.activeFormattingElements.insertMarker();
- } else if (
- tn === $.CAPTION ||
- tn === $.COL ||
- tn === $.COLGROUP ||
- tn === $.TBODY ||
- tn === $.TFOOT ||
- tn === $.THEAD ||
- tn === $.TR
- ) {
- if (p.openElements.hasInTableScope($.TR)) {
- p.openElements.clearBackToTableRowContext();
- p.openElements.pop();
- p.insertionMode = IN_TABLE_BODY_MODE;
- p._processToken(token);
- }
- } else {
- startTagInTable(p, token);
- }
-}
-
-function endTagInRow(p, token) {
- const tn = token.tagName;
-
- if (tn === $.TR) {
- if (p.openElements.hasInTableScope($.TR)) {
- p.openElements.clearBackToTableRowContext();
- p.openElements.pop();
- p.insertionMode = IN_TABLE_BODY_MODE;
- }
- } else if (tn === $.TABLE) {
- if (p.openElements.hasInTableScope($.TR)) {
- p.openElements.clearBackToTableRowContext();
- p.openElements.pop();
- p.insertionMode = IN_TABLE_BODY_MODE;
- p._processToken(token);
- }
- } else if (tn === $.TBODY || tn === $.TFOOT || tn === $.THEAD) {
- if (p.openElements.hasInTableScope(tn) || p.openElements.hasInTableScope($.TR)) {
- p.openElements.clearBackToTableRowContext();
- p.openElements.pop();
- p.insertionMode = IN_TABLE_BODY_MODE;
- p._processToken(token);
- }
- } else if (
- (tn !== $.BODY && tn !== $.CAPTION && tn !== $.COL && tn !== $.COLGROUP) ||
- (tn !== $.HTML && tn !== $.TD && tn !== $.TH)
- ) {
- endTagInTable(p, token);
- }
-}
-
-// The "in cell" insertion mode
-//------------------------------------------------------------------
-function startTagInCell(p, token) {
- const tn = token.tagName;
-
- if (
- tn === $.CAPTION ||
- tn === $.COL ||
- tn === $.COLGROUP ||
- tn === $.TBODY ||
- tn === $.TD ||
- tn === $.TFOOT ||
- tn === $.TH ||
- tn === $.THEAD ||
- tn === $.TR
- ) {
- if (p.openElements.hasInTableScope($.TD) || p.openElements.hasInTableScope($.TH)) {
- p._closeTableCell();
- p._processToken(token);
- }
- } else {
- startTagInBody(p, token);
- }
-}
-
-function endTagInCell(p, token) {
- const tn = token.tagName;
-
- if (tn === $.TD || tn === $.TH) {
- if (p.openElements.hasInTableScope(tn)) {
- p.openElements.generateImpliedEndTags();
- p.openElements.popUntilTagNamePopped(tn);
- p.activeFormattingElements.clearToLastMarker();
- p.insertionMode = IN_ROW_MODE;
- }
- } else if (tn === $.TABLE || tn === $.TBODY || tn === $.TFOOT || tn === $.THEAD || tn === $.TR) {
- if (p.openElements.hasInTableScope(tn)) {
- p._closeTableCell();
- p._processToken(token);
- }
- } else if (tn !== $.BODY && tn !== $.CAPTION && tn !== $.COL && tn !== $.COLGROUP && tn !== $.HTML) {
- endTagInBody(p, token);
- }
-}
-
-// The "in select" insertion mode
-//------------------------------------------------------------------
-function startTagInSelect(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HTML) {
- startTagInBody(p, token);
- } else if (tn === $.OPTION) {
- if (p.openElements.currentTagName === $.OPTION) {
- p.openElements.pop();
- }
-
- p._insertElement(token, NS.HTML);
- } else if (tn === $.OPTGROUP) {
- if (p.openElements.currentTagName === $.OPTION) {
- p.openElements.pop();
- }
-
- if (p.openElements.currentTagName === $.OPTGROUP) {
- p.openElements.pop();
- }
-
- p._insertElement(token, NS.HTML);
- } else if (tn === $.INPUT || tn === $.KEYGEN || tn === $.TEXTAREA || tn === $.SELECT) {
- if (p.openElements.hasInSelectScope($.SELECT)) {
- p.openElements.popUntilTagNamePopped($.SELECT);
- p._resetInsertionMode();
-
- if (tn !== $.SELECT) {
- p._processToken(token);
- }
- }
- } else if (tn === $.SCRIPT || tn === $.TEMPLATE) {
- startTagInHead(p, token);
- }
-}
-
-function endTagInSelect(p, token) {
- const tn = token.tagName;
-
- if (tn === $.OPTGROUP) {
- const prevOpenElement = p.openElements.items[p.openElements.stackTop - 1];
- const prevOpenElementTn = prevOpenElement && p.treeAdapter.getTagName(prevOpenElement);
-
- if (p.openElements.currentTagName === $.OPTION && prevOpenElementTn === $.OPTGROUP) {
- p.openElements.pop();
- }
-
- if (p.openElements.currentTagName === $.OPTGROUP) {
- p.openElements.pop();
- }
- } else if (tn === $.OPTION) {
- if (p.openElements.currentTagName === $.OPTION) {
- p.openElements.pop();
- }
- } else if (tn === $.SELECT && p.openElements.hasInSelectScope($.SELECT)) {
- p.openElements.popUntilTagNamePopped($.SELECT);
- p._resetInsertionMode();
- } else if (tn === $.TEMPLATE) {
- endTagInHead(p, token);
- }
-}
-
-//12.2.5.4.17 The "in select in table" insertion mode
-//------------------------------------------------------------------
-function startTagInSelectInTable(p, token) {
- const tn = token.tagName;
-
- if (
- tn === $.CAPTION ||
- tn === $.TABLE ||
- tn === $.TBODY ||
- tn === $.TFOOT ||
- tn === $.THEAD ||
- tn === $.TR ||
- tn === $.TD ||
- tn === $.TH
- ) {
- p.openElements.popUntilTagNamePopped($.SELECT);
- p._resetInsertionMode();
- p._processToken(token);
- } else {
- startTagInSelect(p, token);
- }
-}
-
-function endTagInSelectInTable(p, token) {
- const tn = token.tagName;
-
- if (
- tn === $.CAPTION ||
- tn === $.TABLE ||
- tn === $.TBODY ||
- tn === $.TFOOT ||
- tn === $.THEAD ||
- tn === $.TR ||
- tn === $.TD ||
- tn === $.TH
- ) {
- if (p.openElements.hasInTableScope(tn)) {
- p.openElements.popUntilTagNamePopped($.SELECT);
- p._resetInsertionMode();
- p._processToken(token);
- }
- } else {
- endTagInSelect(p, token);
- }
-}
-
-// The "in template" insertion mode
-//------------------------------------------------------------------
-function startTagInTemplate(p, token) {
- const tn = token.tagName;
-
- if (
- tn === $.BASE ||
- tn === $.BASEFONT ||
- tn === $.BGSOUND ||
- tn === $.LINK ||
- tn === $.META ||
- tn === $.NOFRAMES ||
- tn === $.SCRIPT ||
- tn === $.STYLE ||
- tn === $.TEMPLATE ||
- tn === $.TITLE
- ) {
- startTagInHead(p, token);
- } else {
- const newInsertionMode = TEMPLATE_INSERTION_MODE_SWITCH_MAP[tn] || IN_BODY_MODE;
-
- p._popTmplInsertionMode();
- p._pushTmplInsertionMode(newInsertionMode);
- p.insertionMode = newInsertionMode;
- p._processToken(token);
- }
-}
-
-function endTagInTemplate(p, token) {
- if (token.tagName === $.TEMPLATE) {
- endTagInHead(p, token);
- }
-}
-
-function eofInTemplate(p, token) {
- if (p.openElements.tmplCount > 0) {
- p.openElements.popUntilTagNamePopped($.TEMPLATE);
- p.activeFormattingElements.clearToLastMarker();
- p._popTmplInsertionMode();
- p._resetInsertionMode();
- p._processToken(token);
- } else {
- p.stopped = true;
- }
-}
-
-// The "after body" insertion mode
-//------------------------------------------------------------------
-function startTagAfterBody(p, token) {
- if (token.tagName === $.HTML) {
- startTagInBody(p, token);
- } else {
- tokenAfterBody(p, token);
- }
-}
-
-function endTagAfterBody(p, token) {
- if (token.tagName === $.HTML) {
- if (!p.fragmentContext) {
- p.insertionMode = AFTER_AFTER_BODY_MODE;
- }
- } else {
- tokenAfterBody(p, token);
- }
-}
-
-function tokenAfterBody(p, token) {
- p.insertionMode = IN_BODY_MODE;
- p._processToken(token);
-}
-
-// The "in frameset" insertion mode
-//------------------------------------------------------------------
-function startTagInFrameset(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HTML) {
- startTagInBody(p, token);
- } else if (tn === $.FRAMESET) {
- p._insertElement(token, NS.HTML);
- } else if (tn === $.FRAME) {
- p._appendElement(token, NS.HTML);
- token.ackSelfClosing = true;
- } else if (tn === $.NOFRAMES) {
- startTagInHead(p, token);
- }
-}
-
-function endTagInFrameset(p, token) {
- if (token.tagName === $.FRAMESET && !p.openElements.isRootHtmlElementCurrent()) {
- p.openElements.pop();
-
- if (!p.fragmentContext && p.openElements.currentTagName !== $.FRAMESET) {
- p.insertionMode = AFTER_FRAMESET_MODE;
- }
- }
-}
-
-// The "after frameset" insertion mode
-//------------------------------------------------------------------
-function startTagAfterFrameset(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HTML) {
- startTagInBody(p, token);
- } else if (tn === $.NOFRAMES) {
- startTagInHead(p, token);
- }
-}
-
-function endTagAfterFrameset(p, token) {
- if (token.tagName === $.HTML) {
- p.insertionMode = AFTER_AFTER_FRAMESET_MODE;
- }
-}
-
-// The "after after body" insertion mode
-//------------------------------------------------------------------
-function startTagAfterAfterBody(p, token) {
- if (token.tagName === $.HTML) {
- startTagInBody(p, token);
- } else {
- tokenAfterAfterBody(p, token);
- }
-}
-
-function tokenAfterAfterBody(p, token) {
- p.insertionMode = IN_BODY_MODE;
- p._processToken(token);
-}
-
-// The "after after frameset" insertion mode
-//------------------------------------------------------------------
-function startTagAfterAfterFrameset(p, token) {
- const tn = token.tagName;
-
- if (tn === $.HTML) {
- startTagInBody(p, token);
- } else if (tn === $.NOFRAMES) {
- startTagInHead(p, token);
- }
-}
-
-// The rules for parsing tokens in foreign content
-//------------------------------------------------------------------
-function nullCharacterInForeignContent(p, token) {
- token.chars = unicode.REPLACEMENT_CHARACTER;
- p._insertCharacters(token);
-}
-
-function characterInForeignContent(p, token) {
- p._insertCharacters(token);
- p.framesetOk = false;
-}
-
-function startTagInForeignContent(p, token) {
- if (foreignContent.causesExit(token) && !p.fragmentContext) {
- while (
- p.treeAdapter.getNamespaceURI(p.openElements.current) !== NS.HTML &&
- !p._isIntegrationPoint(p.openElements.current)
- ) {
- p.openElements.pop();
- }
-
- p._processToken(token);
- } else {
- const current = p._getAdjustedCurrentElement();
- const currentNs = p.treeAdapter.getNamespaceURI(current);
-
- if (currentNs === NS.MATHML) {
- foreignContent.adjustTokenMathMLAttrs(token);
- } else if (currentNs === NS.SVG) {
- foreignContent.adjustTokenSVGTagName(token);
- foreignContent.adjustTokenSVGAttrs(token);
- }
-
- foreignContent.adjustTokenXMLAttrs(token);
-
- if (token.selfClosing) {
- p._appendElement(token, currentNs);
- } else {
- p._insertElement(token, currentNs);
- }
-
- token.ackSelfClosing = true;
- }
-}
-
-function endTagInForeignContent(p, token) {
- for (let i = p.openElements.stackTop; i > 0; i--) {
- const element = p.openElements.items[i];
-
- if (p.treeAdapter.getNamespaceURI(element) === NS.HTML) {
- p._processToken(token);
- break;
- }
-
- if (p.treeAdapter.getTagName(element).toLowerCase() === token.tagName) {
- p.openElements.popUntilElementPopped(element);
- break;
- }
- }
-}
diff --git a/packages/parse5/lib/parser/index.test.ts b/packages/parse5/lib/parser/index.test.ts
new file mode 100644
index 0000000000000000000000000000000000000000..fdbc092c395e99b74f3a62c56251740121ad6a91
--- /dev/null
+++ b/packages/parse5/lib/parser/index.test.ts
@@ -0,0 +1,109 @@
+import * as assert from 'node:assert';
+import { parseFragment, parse } from 'parse5';
+import { jest } from '@jest/globals';
+import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-tests.js';
+import { treeAdapters } from 'parse5-test-utils/utils/common.js';
+
+generateParsingTests(
+ 'parser',
+ 'Parser',
+ {
+ expectErrors: [
+ //TODO(GH-448): Foreign content behaviour was updated in the HTML spec.
+ //The old test suite still tests the old behaviour.
+ '269.foreign-fragment',
+ '270.foreign-fragment',
+ '307.foreign-fragment',
+ '309.foreign-fragment',
+ '316.foreign-fragment',
+ '317.foreign-fragment',
+ ],
+ },
+ (test, opts) => ({
+ node: test.fragmentContext ? parseFragment(test.fragmentContext, test.input, opts) : parse(test.input, opts),
+ })
+);
+
+generateParsingTests(
+ 'parser upstream',
+ 'Parser',
+ {
+ withoutErrors: true,
+ suitePath: new URL('../../../../test/data/html5lib-tests/tree-construction', import.meta.url),
+ expectErrors: ['505.search-element', '506.search-element'],
+ },
+ (test, opts) => ({
+ node: test.fragmentContext ? parseFragment(test.fragmentContext, test.input, opts) : parse(test.input, opts),
+ })
+);
+
+describe('parser', () => {
+ it('Regression - HTML5 Legacy Doctype Misparsed with htmlparser2 tree adapter (GH-45)', () => {
+ const html = 'Hi there!';
+ const document = parse(html, { treeAdapter: treeAdapters.htmlparser2 });
+
+ assert.ok(treeAdapters.htmlparser2.isDocumentTypeNode(document.childNodes[0]));
+ assert.strictEqual(document.childNodes[0].data, '!DOCTYPE html SYSTEM "about:legacy-compat"');
+ });
+
+ describe("Regression - Don't inherit from Object when creating collections (GH-119)", () => {
+ beforeEach(() => {
+ /*eslint-disable no-extend-native*/
+ // @ts-expect-error Adding unknown prototype method
+ Object.prototype.heyYo = 123;
+ /*eslint-enable no-extend-native*/
+ });
+
+ afterEach(() => {
+ // @ts-expect-error Deleting unknown prototype property
+ delete Object.prototype.heyYo;
+ });
+
+ it('parses correctly', () => {
+ const fragment = parseFragment('', {
+ treeAdapter: treeAdapters.htmlparser2,
+ });
+
+ assert.ok(treeAdapters.htmlparser2.isElementNode(fragment.childNodes[0]));
+ assert.strictEqual(treeAdapters.htmlparser2.getAttrList(fragment.childNodes[0]).length, 1);
+ });
+ });
+
+ it('Regression - DOCTYPE empty fields (GH-236)', () => {
+ const document = parse('');
+ const doctype = document.childNodes[0];
+
+ expect(doctype).toHaveProperty('name', '');
+ expect(doctype).toHaveProperty('publicId', '');
+ expect(doctype).toHaveProperty('systemId', '');
+ });
+
+ describe('Tree adapters', () => {
+ it('should support onItemPush and onItemPop', () => {
+ const onItemPush = jest.fn();
+ const onItemPop = jest.fn();
+ const document = parse('
', {
+ treeAdapter: {
+ ...treeAdapters.default,
+ onItemPush,
+ onItemPop,
+ },
+ });
+
+ const htmlElement = document.childNodes[0];
+ assert.ok(treeAdapters.default.isElementNode(htmlElement));
+ const bodyElement = htmlElement.childNodes[1];
+ assert.ok(treeAdapters.default.isElementNode(bodyElement));
+ // Expect 5 opened elements; in order: html, head, body, and 2x p
+ expect(onItemPush).toHaveBeenCalledTimes(5);
+ expect(onItemPush).toHaveBeenNthCalledWith(1, htmlElement);
+ expect(onItemPush).toHaveBeenNthCalledWith(3, bodyElement);
+ // The last opened element is the second p
+ expect(onItemPush).toHaveBeenLastCalledWith(bodyElement.childNodes[1]);
+ // The second p isn't closed, plus we never pop body and html. Alas, only 2 pop events (head and p).
+ expect(onItemPop).toHaveBeenCalledTimes(2);
+ // The last pop event should be the first p.
+ expect(onItemPop).toHaveBeenLastCalledWith(bodyElement.childNodes[0], bodyElement);
+ });
+ });
+});
diff --git a/packages/parse5/lib/parser/index.ts b/packages/parse5/lib/parser/index.ts
new file mode 100644
index 0000000000000000000000000000000000000000..161cf50cf60d8a8242dab37f1fff7de41444fb55
--- /dev/null
+++ b/packages/parse5/lib/parser/index.ts
@@ -0,0 +1,3584 @@
+import { Tokenizer, TokenizerMode, type TokenHandler } from '../tokenizer/index.js';
+import { OpenElementStack, type StackHandler } from './open-element-stack.js';
+import { FormattingElementList, EntryType, type ElementEntry } from './formatting-element-list.js';
+import { defaultTreeAdapter, type DefaultTreeAdapterMap } from '../tree-adapters/default.js';
+import * as doctype from '../common/doctype.js';
+import * as foreignContent from '../common/foreign-content.js';
+import { ERR, type ParserErrorHandler } from '../common/error-codes.js';
+import * as unicode from '../common/unicode.js';
+import {
+ TAG_ID as $,
+ TAG_NAMES as TN,
+ NS,
+ ATTRS,
+ SPECIAL_ELEMENTS,
+ DOCUMENT_MODE,
+ isNumberedHeader,
+ getTagID,
+} from '../common/html.js';
+import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface.js';
+import {
+ TokenType,
+ getTokenAttr,
+ type Token,
+ type CommentToken,
+ type CharacterToken,
+ type TagToken,
+ type DoctypeToken,
+ type EOFToken,
+ type LocationWithAttributes,
+ type ElementLocation,
+} from '../common/token.js';
+
+//Misc constants
+const HIDDEN_INPUT_TYPE = 'hidden';
+
+//Adoption agency loops iteration count
+const AA_OUTER_LOOP_ITER = 8;
+const AA_INNER_LOOP_ITER = 3;
+
+//Insertion modes
+enum InsertionMode {
+ INITIAL,
+ BEFORE_HTML,
+ BEFORE_HEAD,
+ IN_HEAD,
+ IN_HEAD_NO_SCRIPT,
+ AFTER_HEAD,
+ IN_BODY,
+ TEXT,
+ IN_TABLE,
+ IN_TABLE_TEXT,
+ IN_CAPTION,
+ IN_COLUMN_GROUP,
+ IN_TABLE_BODY,
+ IN_ROW,
+ IN_CELL,
+ IN_SELECT,
+ IN_SELECT_IN_TABLE,
+ IN_TEMPLATE,
+ AFTER_BODY,
+ IN_FRAMESET,
+ AFTER_FRAMESET,
+ AFTER_AFTER_BODY,
+ AFTER_AFTER_FRAMESET,
+}
+
+const BASE_LOC = {
+ startLine: -1,
+ startCol: -1,
+ startOffset: -1,
+ endLine: -1,
+ endCol: -1,
+ endOffset: -1,
+};
+
+const TABLE_STRUCTURE_TAGS = new Set([$.TABLE, $.TBODY, $.TFOOT, $.THEAD, $.TR]);
+
+export interface ParserOptions {
+ /**
+ * The [scripting flag](https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag). If set
+ * to `true`, `noscript` element content will be parsed as text.
+ *
+ * @default `true`
+ */
+ scriptingEnabled?: boolean;
+
+ /**
+ * Enables source code location information. When enabled, each node (except the root node)
+ * will have a `sourceCodeLocation` property. If the node is not an empty element, `sourceCodeLocation` will
+ * be a {@link ElementLocation} object, otherwise it will be {@link Location}.
+ * If the element was implicitly created by the parser (as part of
+ * [tree correction](https://html.spec.whatwg.org/multipage/syntax.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser)),
+ * its `sourceCodeLocation` property will be `undefined`.
+ *
+ * @default `false`
+ */
+ sourceCodeLocationInfo?: boolean;
+
+ /**
+ * Specifies the resulting tree format.
+ *
+ * @default `treeAdapters.default`
+ */
+ treeAdapter?: TreeAdapter;
+
+ /**
+ * Callback for parse errors.
+ *
+ * @default `null`
+ */
+ onParseError?: ParserErrorHandler | null;
+ compileResult: any;
+ componentValidator: any;
+}
+
+const defaultParserOptions: Required> = {
+ scriptingEnabled: true,
+ sourceCodeLocationInfo: false,
+ treeAdapter: defaultTreeAdapter,
+ onParseError: null,
+ compileResult: null,
+ componentValidator: null
+};
+
+//Parser
+export class Parser implements TokenHandler, StackHandler {
+ treeAdapter: TreeAdapter;
+ onParseError: ParserErrorHandler | null;
+ private currentToken: Token | null = null;
+ public options: Required>;
+ public document: T['document'];
+ nodeInfo: {};
+ validator: any;
+ compileResult: any
+
+ public constructor(
+ options?: ParserOptions,
+ document?: T['document'],
+ public fragmentContext: T['element'] | null = null,
+ public scriptHandler: null | ((pendingScript: T['element']) => void) = null
+ ) {
+ this.options = {
+ ...defaultParserOptions,
+ ...options,
+ } as Required>;
+ this.nodeInfo = {};
+ if(this.options.componentValidator){
+ this.validator = this.options.componentValidator;
+ }
+
+ if(this.options.compileResult){
+ this.compileResult = this.options.compileResult;
+ }
+ this.treeAdapter = this.options.treeAdapter;
+ this.onParseError = this.options.onParseError;
+
+ // Always enable location info if we report parse errors.
+ if (this.onParseError) {
+ this.options.sourceCodeLocationInfo = true;
+ }
+
+ this.document = document ?? this.treeAdapter.createDocument();
+
+ this.tokenizer = new Tokenizer(this.options, this);
+ this.activeFormattingElements = new FormattingElementList(this.treeAdapter);
+
+ this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN;
+ this._setContextModes(fragmentContext ?? this.document, this.fragmentContextID);
+
+ this.openElements = new OpenElementStack(this.document, this.treeAdapter, this);
+ }
+
+ // API
+ public static parse(html: string, options?: ParserOptions): T['document'] {
+ const parser = new this(options);
+ const token = parser.tokenizer;
+ parser.tokenizer.write(html, true);
+ parser._runParsingLoop(token);
+ return parser.document;
+ }
+
+ _runParsingLoop(token: Token) {
+ let lastToken = {};
+ if (!this.stopped) {
+ if (token.type !== TokenType.EOF && token.type !== TokenType.WHITESPACE_CHARACTER) {
+ lastToken =token;
+ }
+ checkselfClosingNode(this, token);
+ }
+ checkInvalid(this, lastToken);
+ }
+
+ public static getFragmentParser(
+ fragmentContext?: T['parentNode'] | null,
+ options?: ParserOptions
+ ): Parser {
+ const opts: Required> = {
+ ...defaultParserOptions,
+ ...options,
+ } as Required>;
+
+ //NOTE: use a element as the fragment context if no context element was provided,
+ //so we will parse in a "forgiving" manner
+ fragmentContext ??= opts.treeAdapter.createElement(TN.TEMPLATE, NS.HTML, []);
+
+ //NOTE: create a fake element which will be used as the `document` for fragment parsing.
+ //This is important for jsdom, where a new `document` cannot be created. This led to
+ //fragment parsing messing with the main `document`.
+ const documentMock = opts.treeAdapter.createElement('documentmock', NS.HTML, []);
+
+ const parser = new this(opts, documentMock, fragmentContext);
+
+ if (parser.fragmentContextID === $.TEMPLATE) {
+ parser.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
+ }
+
+ parser._initTokenizerForFragmentParsing();
+ parser._insertFakeRootElement();
+ parser._resetInsertionMode();
+ parser._findFormInFragmentContext();
+
+ return parser;
+ }
+
+ public getFragment(): T['documentFragment'] {
+ const rootElement = this.treeAdapter.getFirstChild(this.document) as T['parentNode'];
+ const fragment = this.treeAdapter.createDocumentFragment();
+
+ this._adoptNodes(rootElement, fragment);
+
+ return fragment;
+ }
+
+ tokenizer: Tokenizer;
+
+ stopped = false;
+ insertionMode = InsertionMode.INITIAL;
+ originalInsertionMode = InsertionMode.INITIAL;
+
+ fragmentContextID: $;
+
+ headElement: null | T['element'] = null;
+ formElement: null | T['element'] = null;
+
+ openElements: OpenElementStack;
+ activeFormattingElements: FormattingElementList;
+ /** Indicates that the current node is not an element in the HTML namespace */
+ private currentNotInHTML = false;
+
+ /**
+ * The template insertion mode stack is maintained from the left.
+ * Ie. the topmost element will always have index 0.
+ */
+ tmplInsertionModeStack: InsertionMode[] = [];
+
+ pendingCharacterTokens: CharacterToken[] = [];
+ hasNonWhitespacePendingCharacterToken = false;
+
+ framesetOk = true;
+ skipNextNewLine = false;
+ fosterParentingEnabled = false;
+
+ //Errors
+ _err(token: Token, code: ERR, beforeToken?: boolean): void {
+ if (!this.onParseError) return;
+
+ const loc = token.location ?? BASE_LOC;
+ const err = {
+ code,
+ startLine: loc.startLine,
+ startCol: loc.startCol,
+ startOffset: loc.startOffset,
+ endLine: beforeToken ? loc.startLine : loc.endLine,
+ endCol: beforeToken ? loc.startCol : loc.endCol,
+ endOffset: beforeToken ? loc.startOffset : loc.endOffset,
+ };
+
+ this.onParseError(err);
+ }
+
+ //Stack events
+ onItemPush(node: T['parentNode'], tid: number, isTop: boolean): void {
+ this.treeAdapter.onItemPush?.(node);
+ if (isTop && this.openElements.stackTop > 0) this._setContextModes(node, tid);
+ }
+
+ onItemPop(node: T['parentNode'], isTop: boolean): void {
+ if (this.options.sourceCodeLocationInfo) {
+ this._setEndLocation(node, this.currentToken!);
+ }
+
+ this.treeAdapter.onItemPop?.(node, this.openElements.current);
+
+ if (isTop) {
+ let current;
+ let currentTagId;
+
+ if (this.openElements.stackTop === 0 && this.fragmentContext) {
+ current = this.fragmentContext;
+ currentTagId = this.fragmentContextID;
+ } else {
+ ({ current, currentTagId } = this.openElements);
+ }
+
+ this._setContextModes(current, currentTagId);
+ }
+ }
+
+ private _setContextModes(current: T['parentNode'], tid: number): void {
+ const isHTML = current === this.document || this.treeAdapter.getNamespaceURI(current) === NS.HTML;
+
+ this.currentNotInHTML = !isHTML;
+ this.tokenizer.inForeignNode = !isHTML && !this._isIntegrationPoint(tid, current);
+ }
+
+ _switchToTextParsing(
+ currentToken: TagToken,
+ nextTokenizerState: typeof TokenizerMode[keyof typeof TokenizerMode]
+ ): void {
+ this._insertElement(currentToken, NS.HTML);
+ this.tokenizer.state = nextTokenizerState;
+ this.originalInsertionMode = this.insertionMode;
+ this.insertionMode = InsertionMode.TEXT;
+ }
+
+ switchToPlaintextParsing(): void {
+ this.insertionMode = InsertionMode.TEXT;
+ this.originalInsertionMode = InsertionMode.IN_BODY;
+ this.tokenizer.state = TokenizerMode.PLAINTEXT;
+ }
+
+ //Fragment parsing
+ _getAdjustedCurrentElement(): T['element'] {
+ return this.openElements.stackTop === 0 && this.fragmentContext
+ ? this.fragmentContext
+ : this.openElements.current;
+ }
+
+ _findFormInFragmentContext(): void {
+ let node = this.fragmentContext;
+
+ while (node) {
+ if (this.treeAdapter.getTagName(node) === TN.FORM) {
+ this.formElement = node;
+ break;
+ }
+
+ node = this.treeAdapter.getParentNode(node);
+ }
+ }
+
+ private _initTokenizerForFragmentParsing(): void {
+ if (!this.fragmentContext || this.treeAdapter.getNamespaceURI(this.fragmentContext) !== NS.HTML) {
+ return;
+ }
+
+ switch (this.fragmentContextID) {
+ case $.TITLE:
+ case $.TEXTAREA: {
+ this.tokenizer.state = TokenizerMode.RCDATA;
+ break;
+ }
+ case $.STYLE:
+ case $.XMP:
+ case $.IFRAME:
+ case $.NOEMBED:
+ case $.NOFRAMES:
+ case $.NOSCRIPT: {
+ this.tokenizer.state = TokenizerMode.RAWTEXT;
+ break;
+ }
+ case $.SCRIPT: {
+ this.tokenizer.state = TokenizerMode.SCRIPT_DATA;
+ break;
+ }
+ case $.PLAINTEXT: {
+ this.tokenizer.state = TokenizerMode.PLAINTEXT;
+ break;
+ }
+ default:
+ // Do nothing
+ }
+ }
+
+ //Tree mutation
+ _setDocumentType(token: DoctypeToken): void {
+ const name = token.name || '';
+ const publicId = token.publicId || '';
+ const systemId = token.systemId || '';
+
+ this.treeAdapter.setDocumentType(this.document, name, publicId, systemId);
+
+ if (token.location) {
+ const documentChildren = this.treeAdapter.getChildNodes(this.document);
+ const docTypeNode = documentChildren.find((node) => this.treeAdapter.isDocumentTypeNode(node));
+
+ if (docTypeNode) {
+ this.treeAdapter.setNodeSourceCodeLocation(docTypeNode, token.location);
+ }
+ }
+ }
+
+ _attachElementToTree(element: T['element'], location: LocationWithAttributes | null): void {
+ if (this.options.sourceCodeLocationInfo) {
+ const loc = location && {
+ ...location,
+ startTag: location,
+ };
+
+ this.treeAdapter.setNodeSourceCodeLocation(element, loc);
+ }
+
+ if (this._shouldFosterParentOnInsertion()) {
+ this._fosterParentElement(element);
+ } else {
+ const parent = this.openElements.currentTmplContentOrNode;
+
+ this.treeAdapter.appendChild(parent, element);
+ }
+ }
+
+ _appendElement(token: TagToken, namespaceURI: NS): void {
+ const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
+
+ this._attachElementToTree(element, token.location);
+ }
+
+ _insertElement(token: TagToken, namespaceURI: NS): void {
+ const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
+
+ this._attachElementToTree(element, token.location);
+ this.openElements.push(element, token.tagID);
+ }
+
+ _insertFakeElement(tagName: string, tagID: $): void {
+ const element = this.treeAdapter.createElement(tagName, NS.HTML, []);
+
+ this._attachElementToTree(element, null);
+ this.openElements.push(element, tagID);
+ }
+
+ _insertTemplate(token: TagToken): void {
+ const tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs);
+ const content = this.treeAdapter.createDocumentFragment();
+
+ this.treeAdapter.setTemplateContent(tmpl, content);
+ this._attachElementToTree(tmpl, token.location);
+ this.openElements.push(tmpl, token.tagID);
+ if (this.options.sourceCodeLocationInfo) this.treeAdapter.setNodeSourceCodeLocation(content, null);
+ }
+
+ _insertFakeRootElement(): void {
+ const element = this.treeAdapter.createElement(TN.HTML, NS.HTML, []);
+ if (this.options.sourceCodeLocationInfo) this.treeAdapter.setNodeSourceCodeLocation(element, null);
+
+ this.treeAdapter.appendChild(this.openElements.current, element);
+ this.openElements.push(element, $.HTML);
+ }
+
+ _appendCommentNode(token: CommentToken, parent: T['parentNode']): void {
+ const commentNode = this.treeAdapter.createCommentNode(token.data);
+
+ this.treeAdapter.appendChild(parent, commentNode);
+ if (this.options.sourceCodeLocationInfo) {
+ this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location);
+ }
+ }
+
+ _insertCharacters(token: CharacterToken): void {
+ let parent;
+ let beforeElement;
+
+ if (this._shouldFosterParentOnInsertion()) {
+ ({ parent, beforeElement } = this._findFosterParentingLocation());
+
+ if (beforeElement) {
+ this.treeAdapter.insertTextBefore(parent, token.chars, beforeElement);
+ } else {
+ this.treeAdapter.insertText(parent, token.chars);
+ }
+ } else {
+ parent = this.openElements.currentTmplContentOrNode;
+
+ this.treeAdapter.insertText(parent, token.chars);
+ }
+
+ if (!token.location) return;
+
+ const siblings = this.treeAdapter.getChildNodes(parent);
+ const textNodeIdx = beforeElement ? siblings.lastIndexOf(beforeElement) : siblings.length;
+ const textNode = siblings[textNodeIdx - 1];
+
+ //NOTE: if we have a location assigned by another token, then just update the end position
+ const tnLoc = this.treeAdapter.getNodeSourceCodeLocation(textNode);
+
+ if (tnLoc) {
+ const { endLine, endCol, endOffset } = token.location;
+ this.treeAdapter.updateNodeSourceCodeLocation(textNode, { endLine, endCol, endOffset });
+ } else if (this.options.sourceCodeLocationInfo) {
+ this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location);
+ }
+ }
+
+ _adoptNodes(donor: T['parentNode'], recipient: T['parentNode']): void {
+ for (let child = this.treeAdapter.getFirstChild(donor); child; child = this.treeAdapter.getFirstChild(donor)) {
+ this.treeAdapter.detachNode(child);
+ this.treeAdapter.appendChild(recipient, child);
+ }
+ }
+
+ _setEndLocation(element: T['element'], closingToken: Token): void {
+ if (this.treeAdapter.getNodeSourceCodeLocation(element) && closingToken.location) {
+ const ctLoc = closingToken.location;
+ const tn = this.treeAdapter.getTagName(element);
+
+ const endLoc: Partial =
+ // NOTE: For cases like
- First 'p' closes without a closing
+ // tag and for cases like | - 'p' closes without a closing tag.
+ closingToken.type === TokenType.END_TAG && tn === closingToken.tagName
+ ? {
+ endTag: { ...ctLoc },
+ endLine: ctLoc.endLine,
+ endCol: ctLoc.endCol,
+ endOffset: ctLoc.endOffset,
+ }
+ : {
+ endLine: ctLoc.startLine,
+ endCol: ctLoc.startCol,
+ endOffset: ctLoc.startOffset,
+ };
+
+ this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc);
+ }
+ }
+
+ //Token processing
+ private shouldProcessStartTagTokenInForeignContent(token: TagToken): boolean {
+ // Check that neither current === document, or ns === NS.HTML
+ if (!this.currentNotInHTML) return false;
+
+ let current: T['parentNode'];
+ let currentTagId: number;
+
+ if (this.openElements.stackTop === 0 && this.fragmentContext) {
+ current = this.fragmentContext;
+ currentTagId = this.fragmentContextID;
+ } else {
+ ({ current, currentTagId } = this.openElements);
+ }
+
+ if (
+ token.tagID === $.SVG &&
+ this.treeAdapter.getTagName(current) === TN.ANNOTATION_XML &&
+ this.treeAdapter.getNamespaceURI(current) === NS.MATHML
+ ) {
+ return false;
+ }
+
+ return (
+ // Check that `current` is not an integration point for HTML or MathML elements.
+ this.tokenizer.inForeignNode ||
+ // If it _is_ an integration point, then we might have to check that it is not an HTML
+ // integration point.
+ ((token.tagID === $.MGLYPH || token.tagID === $.MALIGNMARK) &&
+ !this._isIntegrationPoint(currentTagId, current, NS.HTML))
+ );
+ }
+
+ _processToken(token: Token): void {
+ switch (token.type) {
+ case TokenType.CHARACTER: {
+ this.onCharacter(token);
+ break;
+ }
+ case TokenType.NULL_CHARACTER: {
+ this.onNullCharacter(token);
+ break;
+ }
+ case TokenType.COMMENT: {
+ this.onComment(token);
+ break;
+ }
+ case TokenType.DOCTYPE: {
+ this.onDoctype(token);
+ break;
+ }
+ case TokenType.START_TAG: {
+ this._processStartTag(token);
+ break;
+ }
+ case TokenType.END_TAG: {
+ this.onEndTag(token);
+ break;
+ }
+ case TokenType.EOF: {
+ this.onEof(token);
+ break;
+ }
+ case TokenType.WHITESPACE_CHARACTER: {
+ this.onWhitespaceCharacter(token);
+ break;
+ }
+ }
+ }
+
+ //Integration points
+ _isIntegrationPoint(tid: $, element: T['element'], foreignNS?: NS): boolean {
+ const ns = this.treeAdapter.getNamespaceURI(element);
+ const attrs = this.treeAdapter.getAttrList(element);
+
+ return foreignContent.isIntegrationPoint(tid, ns, attrs, foreignNS);
+ }
+
+ //Active formatting elements reconstruction
+ _reconstructActiveFormattingElements(): void {
+ const listLength = this.activeFormattingElements.entries.length;
+
+ if (listLength) {
+ const endIndex = this.activeFormattingElements.entries.findIndex(
+ (entry) => entry.type === EntryType.Marker || this.openElements.contains(entry.element)
+ );
+
+ const unopenIdx = endIndex < 0 ? listLength - 1 : endIndex - 1;
+
+ for (let i = unopenIdx; i >= 0; i--) {
+ const entry = this.activeFormattingElements.entries[i] as ElementEntry;
+ this._insertElement(entry.token, this.treeAdapter.getNamespaceURI(entry.element));
+ entry.element = this.openElements.current;
+ }
+ }
+ }
+
+ //Close elements
+ _closeTableCell(): void {
+ this.openElements.generateImpliedEndTags();
+ this.openElements.popUntilTableCellPopped();
+ this.activeFormattingElements.clearToLastMarker();
+ this.insertionMode = InsertionMode.IN_ROW;
+ }
+
+ _closePElement(): void {
+ this.openElements.generateImpliedEndTagsWithExclusion($.P);
+ this.openElements.popUntilTagNamePopped($.P);
+ }
+
+ //Insertion modes
+ _resetInsertionMode(): void {
+ for (let i = this.openElements.stackTop; i >= 0; i--) {
+ //Insertion mode reset map
+ switch (i === 0 && this.fragmentContext ? this.fragmentContextID : this.openElements.tagIDs[i]) {
+ case $.TR:
+ this.insertionMode = InsertionMode.IN_ROW;
+ return;
+ case $.TBODY:
+ case $.THEAD:
+ case $.TFOOT:
+ this.insertionMode = InsertionMode.IN_TABLE_BODY;
+ return;
+ case $.CAPTION:
+ this.insertionMode = InsertionMode.IN_CAPTION;
+ return;
+ case $.COLGROUP:
+ this.insertionMode = InsertionMode.IN_COLUMN_GROUP;
+ return;
+ case $.TABLE:
+ this.insertionMode = InsertionMode.IN_TABLE;
+ return;
+ case $.BODY:
+ this.insertionMode = InsertionMode.IN_BODY;
+ return;
+ case $.FRAMESET:
+ this.insertionMode = InsertionMode.IN_FRAMESET;
+ return;
+ case $.SELECT:
+ this._resetInsertionModeForSelect(i);
+ return;
+ case $.TEMPLATE:
+ this.insertionMode = this.tmplInsertionModeStack[0];
+ return;
+ case $.HTML:
+ this.insertionMode = this.headElement ? InsertionMode.AFTER_HEAD : InsertionMode.BEFORE_HEAD;
+ return;
+ case $.TD:
+ case $.TH:
+ if (i > 0) {
+ this.insertionMode = InsertionMode.IN_CELL;
+ return;
+ }
+ break;
+ case $.HEAD:
+ if (i > 0) {
+ this.insertionMode = InsertionMode.IN_HEAD;
+ return;
+ }
+ break;
+ }
+ }
+
+ this.insertionMode = InsertionMode.IN_BODY;
+ }
+
+ _resetInsertionModeForSelect(selectIdx: number): void {
+ if (selectIdx > 0) {
+ for (let i = selectIdx - 1; i > 0; i--) {
+ const tn = this.openElements.tagIDs[i];
+
+ if (tn === $.TEMPLATE) {
+ break;
+ } else if (tn === $.TABLE) {
+ this.insertionMode = InsertionMode.IN_SELECT_IN_TABLE;
+ return;
+ }
+ }
+ }
+
+ this.insertionMode = InsertionMode.IN_SELECT;
+ }
+
+ //Foster parenting
+ _isElementCausesFosterParenting(tn: $): boolean {
+ return TABLE_STRUCTURE_TAGS.has(tn);
+ }
+
+ _shouldFosterParentOnInsertion(): boolean {
+ return this.fosterParentingEnabled && this._isElementCausesFosterParenting(this.openElements.currentTagId);
+ }
+
+ _findFosterParentingLocation(): { parent: T['parentNode']; beforeElement: T['element'] | null } {
+ for (let i = this.openElements.stackTop; i >= 0; i--) {
+ const openElement = this.openElements.items[i];
+
+ switch (this.openElements.tagIDs[i]) {
+ case $.TEMPLATE:
+ if (this.treeAdapter.getNamespaceURI(openElement) === NS.HTML) {
+ return { parent: this.treeAdapter.getTemplateContent(openElement), beforeElement: null };
+ }
+ break;
+ case $.TABLE: {
+ const parent = this.treeAdapter.getParentNode(openElement);
+
+ if (parent) {
+ return { parent, beforeElement: openElement };
+ }
+
+ return { parent: this.openElements.items[i - 1], beforeElement: null };
+ }
+ default:
+ // Do nothing
+ }
+ }
+
+ return { parent: this.openElements.items[0], beforeElement: null };
+ }
+
+ _fosterParentElement(element: T['element']): void {
+ const location = this._findFosterParentingLocation();
+
+ if (location.beforeElement) {
+ this.treeAdapter.insertBefore(location.parent, element, location.beforeElement);
+ } else {
+ this.treeAdapter.appendChild(location.parent, element);
+ }
+ }
+
+ //Special elements
+ _isSpecialElement(element: T['element'], id: $): boolean {
+ const ns = this.treeAdapter.getNamespaceURI(element);
+
+ return SPECIAL_ELEMENTS[ns].has(id);
+ }
+
+ onCharacter(token: CharacterToken): void {
+ this.skipNextNewLine = false;
+
+ if (this.tokenizer.inForeignNode) {
+ characterInForeignContent(this, token);
+ return;
+ }
+
+ switch (this.insertionMode) {
+ case InsertionMode.INITIAL:
+ tokenInInitialMode(this, token);
+ break;
+ case InsertionMode.BEFORE_HTML:
+ tokenBeforeHtml(this, token);
+ break;
+ case InsertionMode.BEFORE_HEAD:
+ tokenBeforeHead(this, token);
+ break;
+ case InsertionMode.IN_HEAD:
+ tokenInHead(this, token);
+ break;
+ case InsertionMode.IN_HEAD_NO_SCRIPT:
+ tokenInHeadNoScript(this, token);
+ break;
+ case InsertionMode.AFTER_HEAD:
+ tokenAfterHead(this, token);
+ break;
+ case InsertionMode.IN_BODY:
+ case InsertionMode.IN_CAPTION:
+ case InsertionMode.IN_CELL:
+ case InsertionMode.IN_TEMPLATE:
+ characterInBody(this, token);
+ break;
+ case InsertionMode.TEXT:
+ case InsertionMode.IN_SELECT:
+ case InsertionMode.IN_SELECT_IN_TABLE:
+ this._insertCharacters(token);
+ break;
+ case InsertionMode.IN_TABLE:
+ case InsertionMode.IN_TABLE_BODY:
+ case InsertionMode.IN_ROW:
+ characterInTable(this, token);
+ break;
+ case InsertionMode.IN_TABLE_TEXT:
+ characterInTableText(this, token);
+ break;
+ case InsertionMode.IN_COLUMN_GROUP:
+ tokenInColumnGroup(this, token);
+ break;
+ case InsertionMode.AFTER_BODY:
+ tokenAfterBody(this, token);
+ break;
+ case InsertionMode.AFTER_AFTER_BODY:
+ tokenAfterAfterBody(this, token);
+ break;
+ default:
+ // Do nothing
+ }
+ }
+ onNullCharacter(token: CharacterToken): void {
+ this.skipNextNewLine = false;
+
+ if (this.tokenizer.inForeignNode) {
+ nullCharacterInForeignContent(this, token);
+ return;
+ }
+
+ switch (this.insertionMode) {
+ case InsertionMode.INITIAL:
+ tokenInInitialMode(this, token);
+ break;
+ case InsertionMode.BEFORE_HTML:
+ tokenBeforeHtml(this, token);
+ break;
+ case InsertionMode.BEFORE_HEAD:
+ tokenBeforeHead(this, token);
+ break;
+ case InsertionMode.IN_HEAD:
+ tokenInHead(this, token);
+ break;
+ case InsertionMode.IN_HEAD_NO_SCRIPT:
+ tokenInHeadNoScript(this, token);
+ break;
+ case InsertionMode.AFTER_HEAD:
+ tokenAfterHead(this, token);
+ break;
+ case InsertionMode.TEXT:
+ this._insertCharacters(token);
+ break;
+ case InsertionMode.IN_TABLE:
+ case InsertionMode.IN_TABLE_BODY:
+ case InsertionMode.IN_ROW:
+ characterInTable(this, token);
+ break;
+ case InsertionMode.IN_COLUMN_GROUP:
+ tokenInColumnGroup(this, token);
+ break;
+ case InsertionMode.AFTER_BODY:
+ tokenAfterBody(this, token);
+ break;
+ case InsertionMode.AFTER_AFTER_BODY:
+ tokenAfterAfterBody(this, token);
+ break;
+ default:
+ // Do nothing
+ }
+ }
+ onComment(token: CommentToken): void {
+ this.skipNextNewLine = false;
+
+ if (this.currentNotInHTML) {
+ appendComment(this, token);
+ return;
+ }
+
+ switch (this.insertionMode) {
+ case InsertionMode.INITIAL:
+ case InsertionMode.BEFORE_HTML:
+ case InsertionMode.BEFORE_HEAD:
+ case InsertionMode.IN_HEAD:
+ case InsertionMode.IN_HEAD_NO_SCRIPT:
+ case InsertionMode.AFTER_HEAD:
+ case InsertionMode.IN_BODY:
+ case InsertionMode.IN_TABLE:
+ case InsertionMode.IN_CAPTION:
+ case InsertionMode.IN_COLUMN_GROUP:
+ case InsertionMode.IN_TABLE_BODY:
+ case InsertionMode.IN_ROW:
+ case InsertionMode.IN_CELL:
+ case InsertionMode.IN_SELECT:
+ case InsertionMode.IN_SELECT_IN_TABLE:
+ case InsertionMode.IN_TEMPLATE:
+ case InsertionMode.IN_FRAMESET:
+ case InsertionMode.AFTER_FRAMESET:
+ appendComment(this, token);
+ break;
+ case InsertionMode.IN_TABLE_TEXT:
+ tokenInTableText(this, token);
+ break;
+ case InsertionMode.AFTER_BODY:
+ appendCommentToRootHtmlElement(this, token);
+ break;
+ case InsertionMode.AFTER_AFTER_BODY:
+ case InsertionMode.AFTER_AFTER_FRAMESET:
+ appendCommentToDocument(this, token);
+ break;
+ default:
+ // Do nothing
+ }
+ }
+ onDoctype(token: DoctypeToken): void {
+ this.skipNextNewLine = false;
+ switch (this.insertionMode) {
+ case InsertionMode.INITIAL:
+ doctypeInInitialMode(this, token);
+ break;
+ case InsertionMode.BEFORE_HEAD:
+ case InsertionMode.IN_HEAD:
+ case InsertionMode.IN_HEAD_NO_SCRIPT:
+ case InsertionMode.AFTER_HEAD:
+ this._err(token, ERR.misplacedDoctype);
+ break;
+ case InsertionMode.IN_TABLE_TEXT:
+ tokenInTableText(this, token);
+ break;
+ default:
+ // Do nothing
+ }
+ }
+ onStartTag(token: TagToken): void {
+ this.skipNextNewLine = false;
+ this.currentToken = token;
+
+ this._processStartTag(token);
+
+ if (token.selfClosing && !token.ackSelfClosing) {
+ this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus);
+ }
+ }
+ /**
+ * Processes a given start tag.
+ *
+ * `onStartTag` checks if a self-closing tag was recognized. When a token
+ * is moved inbetween multiple insertion modes, this check for self-closing
+ * could lead to false positives. To avoid this, `_processStartTag` is used
+ * for nested calls.
+ *
+ * @param token The token to process.
+ */
+ _processStartTag(token: TagToken): void {
+ if (this.shouldProcessStartTagTokenInForeignContent(token)) {
+ startTagInForeignContent(this, token);
+ } else {
+ this._startTagOutsideForeignContent(token);
+ }
+ }
+ _startTagOutsideForeignContent(token: TagToken): void {
+ switch (this.insertionMode) {
+ case InsertionMode.INITIAL:
+ tokenInInitialMode(this, token);
+ break;
+ case InsertionMode.BEFORE_HTML:
+ startTagBeforeHtml(this, token);
+ break;
+ case InsertionMode.BEFORE_HEAD:
+ startTagBeforeHead(this, token);
+ break;
+ case InsertionMode.IN_HEAD:
+ startTagInHead(this, token);
+ break;
+ case InsertionMode.IN_HEAD_NO_SCRIPT:
+ startTagInHeadNoScript(this, token);
+ break;
+ case InsertionMode.AFTER_HEAD:
+ startTagAfterHead(this, token);
+ break;
+ case InsertionMode.IN_BODY:
+ startTagInBody(this, token);
+ break;
+ case InsertionMode.IN_TABLE:
+ startTagInTable(this, token);
+ break;
+ case InsertionMode.IN_TABLE_TEXT:
+ tokenInTableText(this, token);
+ break;
+ case InsertionMode.IN_CAPTION:
+ startTagInCaption(this, token);
+ break;
+ case InsertionMode.IN_COLUMN_GROUP:
+ startTagInColumnGroup(this, token);
+ break;
+ case InsertionMode.IN_TABLE_BODY:
+ startTagInTableBody(this, token);
+ break;
+ case InsertionMode.IN_ROW:
+ startTagInRow(this, token);
+ break;
+ case InsertionMode.IN_CELL:
+ startTagInCell(this, token);
+ break;
+ case InsertionMode.IN_SELECT:
+ startTagInSelect(this, token);
+ break;
+ case InsertionMode.IN_SELECT_IN_TABLE:
+ startTagInSelectInTable(this, token);
+ break;
+ case InsertionMode.IN_TEMPLATE:
+ startTagInTemplate(this, token);
+ break;
+ case InsertionMode.AFTER_BODY:
+ startTagAfterBody(this, token);
+ break;
+ case InsertionMode.IN_FRAMESET:
+ startTagInFrameset(this, token);
+ break;
+ case InsertionMode.AFTER_FRAMESET:
+ startTagAfterFrameset(this, token);
+ break;
+ case InsertionMode.AFTER_AFTER_BODY:
+ startTagAfterAfterBody(this, token);
+ break;
+ case InsertionMode.AFTER_AFTER_FRAMESET:
+ startTagAfterAfterFrameset(this, token);
+ break;
+ default:
+ // Do nothing
+ }
+ }
+ onEndTag(token: TagToken): void {
+ this.skipNextNewLine = false;
+ this.currentToken = token;
+
+ if (this.currentNotInHTML) {
+ endTagInForeignContent(this, token);
+ } else {
+ this._endTagOutsideForeignContent(token);
+ }
+ }
+ _endTagOutsideForeignContent(token: TagToken): void {
+ switch (this.insertionMode) {
+ case InsertionMode.INITIAL:
+ tokenInInitialMode(this, token);
+ break;
+ case InsertionMode.BEFORE_HTML:
+ endTagBeforeHtml(this, token);
+ break;
+ case InsertionMode.BEFORE_HEAD:
+ endTagBeforeHead(this, token);
+ break;
+ case InsertionMode.IN_HEAD:
+ endTagInHead(this, token);
+ break;
+ case InsertionMode.IN_HEAD_NO_SCRIPT:
+ endTagInHeadNoScript(this, token);
+ break;
+ case InsertionMode.AFTER_HEAD:
+ endTagAfterHead(this, token);
+ break;
+ case InsertionMode.IN_BODY:
+ endTagInBody(this, token);
+ break;
+ case InsertionMode.TEXT:
+ endTagInText(this, token);
+ break;
+ case InsertionMode.IN_TABLE:
+ endTagInTable(this, token);
+ break;
+ case InsertionMode.IN_TABLE_TEXT:
+ tokenInTableText(this, token);
+ break;
+ case InsertionMode.IN_CAPTION:
+ endTagInCaption(this, token);
+ break;
+ case InsertionMode.IN_COLUMN_GROUP:
+ endTagInColumnGroup(this, token);
+ break;
+ case InsertionMode.IN_TABLE_BODY:
+ endTagInTableBody(this, token);
+ break;
+ case InsertionMode.IN_ROW:
+ endTagInRow(this, token);
+ break;
+ case InsertionMode.IN_CELL:
+ endTagInCell(this, token);
+ break;
+ case InsertionMode.IN_SELECT:
+ endTagInSelect(this, token);
+ break;
+ case InsertionMode.IN_SELECT_IN_TABLE:
+ endTagInSelectInTable(this, token);
+ break;
+ case InsertionMode.IN_TEMPLATE:
+ endTagInTemplate(this, token);
+ break;
+ case InsertionMode.AFTER_BODY:
+ endTagAfterBody(this, token);
+ break;
+ case InsertionMode.IN_FRAMESET:
+ endTagInFrameset(this, token);
+ break;
+ case InsertionMode.AFTER_FRAMESET:
+ endTagAfterFrameset(this, token);
+ break;
+ case InsertionMode.AFTER_AFTER_BODY:
+ tokenAfterAfterBody(this, token);
+ break;
+ default:
+ // Do nothing
+ }
+ }
+ onEof(token: EOFToken): void {
+ switch (this.insertionMode) {
+ case InsertionMode.INITIAL:
+ tokenInInitialMode(this, token);
+ break;
+ case InsertionMode.BEFORE_HTML:
+ tokenBeforeHtml(this, token);
+ break;
+ case InsertionMode.BEFORE_HEAD:
+ tokenBeforeHead(this, token);
+ break;
+ case InsertionMode.IN_HEAD:
+ tokenInHead(this, token);
+ break;
+ case InsertionMode.IN_HEAD_NO_SCRIPT:
+ tokenInHeadNoScript(this, token);
+ break;
+ case InsertionMode.AFTER_HEAD:
+ tokenAfterHead(this, token);
+ break;
+ case InsertionMode.IN_BODY:
+ case InsertionMode.IN_TABLE:
+ case InsertionMode.IN_CAPTION:
+ case InsertionMode.IN_COLUMN_GROUP:
+ case InsertionMode.IN_TABLE_BODY:
+ case InsertionMode.IN_ROW:
+ case InsertionMode.IN_CELL:
+ case InsertionMode.IN_SELECT:
+ case InsertionMode.IN_SELECT_IN_TABLE:
+ eofInBody(this, token);
+ break;
+ case InsertionMode.TEXT:
+ eofInText(this, token);
+ break;
+ case InsertionMode.IN_TABLE_TEXT:
+ tokenInTableText(this, token);
+ break;
+ case InsertionMode.IN_TEMPLATE:
+ eofInTemplate(this, token);
+ break;
+ case InsertionMode.AFTER_BODY:
+ case InsertionMode.IN_FRAMESET:
+ case InsertionMode.AFTER_FRAMESET:
+ case InsertionMode.AFTER_AFTER_BODY:
+ case InsertionMode.AFTER_AFTER_FRAMESET:
+ stopParsing(this, token);
+ break;
+ default:
+ // Do nothing
+ }
+ }
+ onWhitespaceCharacter(token: CharacterToken): void {
+ if (this.skipNextNewLine) {
+ this.skipNextNewLine = false;
+
+ if (token.chars.charCodeAt(0) === unicode.CODE_POINTS.LINE_FEED) {
+ if (token.chars.length === 1) {
+ return;
+ }
+
+ token.chars = token.chars.substr(1);
+ }
+ }
+
+ if (this.tokenizer.inForeignNode) {
+ this._insertCharacters(token);
+ return;
+ }
+
+ switch (this.insertionMode) {
+ case InsertionMode.IN_HEAD:
+ case InsertionMode.IN_HEAD_NO_SCRIPT:
+ case InsertionMode.AFTER_HEAD:
+ case InsertionMode.TEXT:
+ case InsertionMode.IN_COLUMN_GROUP:
+ case InsertionMode.IN_SELECT:
+ case InsertionMode.IN_SELECT_IN_TABLE:
+ case InsertionMode.IN_FRAMESET:
+ case InsertionMode.AFTER_FRAMESET:
+ this._insertCharacters(token);
+ break;
+ case InsertionMode.IN_BODY:
+ case InsertionMode.IN_CAPTION:
+ case InsertionMode.IN_CELL:
+ case InsertionMode.IN_TEMPLATE:
+ case InsertionMode.AFTER_BODY:
+ case InsertionMode.AFTER_AFTER_BODY:
+ case InsertionMode.AFTER_AFTER_FRAMESET:
+ whitespaceCharacterInBody(this, token);
+ break;
+ case InsertionMode.IN_TABLE:
+ case InsertionMode.IN_TABLE_BODY:
+ case InsertionMode.IN_ROW:
+ characterInTable(this, token);
+ break;
+ case InsertionMode.IN_TABLE_TEXT:
+ whitespaceCharacterInTableText(this, token);
+ break;
+ default:
+ // Do nothing
+ }
+ }
+}
+
+//Adoption agency algorithm
+//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adoptionAgency)
+//------------------------------------------------------------------
+
+//Steps 5-8 of the algorithm
+function aaObtainFormattingElementEntry(
+ p: Parser,
+ token: TagToken
+): ElementEntry | null {
+ let formattingElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(token.tagName);
+
+ if (formattingElementEntry) {
+ if (!p.openElements.contains(formattingElementEntry.element)) {
+ p.activeFormattingElements.removeEntry(formattingElementEntry);
+ formattingElementEntry = null;
+ } else if (!p.openElements.hasInScope(token.tagID)) {
+ formattingElementEntry = null;
+ }
+ } else {
+ genericEndTagInBody(p, token);
+ }
+
+ return formattingElementEntry;
+}
+
+//Steps 9 and 10 of the algorithm
+function aaObtainFurthestBlock(
+ p: Parser,
+ formattingElementEntry: ElementEntry
+): T['parentNode'] | null {
+ let furthestBlock = null;
+ let idx = p.openElements.stackTop;
+
+ for (; idx >= 0; idx--) {
+ const element = p.openElements.items[idx];
+
+ if (element === formattingElementEntry.element) {
+ break;
+ }
+
+ if (p._isSpecialElement(element, p.openElements.tagIDs[idx])) {
+ furthestBlock = element;
+ }
+ }
+
+ if (!furthestBlock) {
+ p.openElements.shortenToLength(idx < 0 ? 0 : idx);
+ p.activeFormattingElements.removeEntry(formattingElementEntry);
+ }
+
+ return furthestBlock;
+}
+
+//Step 13 of the algorithm
+function aaInnerLoop(
+ p: Parser,
+ furthestBlock: T['element'],
+ formattingElement: T['element']
+): T['element'] {
+ let lastElement = furthestBlock;
+ let nextElement = p.openElements.getCommonAncestor(furthestBlock) as T['element'];
+
+ for (let i = 0, element = nextElement; element !== formattingElement; i++, element = nextElement) {
+ //NOTE: store the next element for the next loop iteration (it may be deleted from the stack by step 9.5)
+ nextElement = p.openElements.getCommonAncestor(element) as T['element'];
+
+ const elementEntry = p.activeFormattingElements.getElementEntry(element);
+ const counterOverflow = elementEntry && i >= AA_INNER_LOOP_ITER;
+ const shouldRemoveFromOpenElements = !elementEntry || counterOverflow;
+
+ if (shouldRemoveFromOpenElements) {
+ if (counterOverflow) {
+ p.activeFormattingElements.removeEntry(elementEntry);
+ }
+
+ p.openElements.remove(element);
+ } else {
+ element = aaRecreateElementFromEntry(p, elementEntry);
+
+ if (lastElement === furthestBlock) {
+ p.activeFormattingElements.bookmark = elementEntry;
+ }
+
+ p.treeAdapter.detachNode(lastElement);
+ p.treeAdapter.appendChild(element, lastElement);
+ lastElement = element;
+ }
+ }
+
+ return lastElement;
+}
+
+//Step 13.7 of the algorithm
+function aaRecreateElementFromEntry(
+ p: Parser,
+ elementEntry: ElementEntry
+): T['element'] {
+ const ns = p.treeAdapter.getNamespaceURI(elementEntry.element);
+ const newElement = p.treeAdapter.createElement(elementEntry.token.tagName, ns, elementEntry.token.attrs);
+
+ p.openElements.replace(elementEntry.element, newElement);
+ elementEntry.element = newElement;
+
+ return newElement;
+}
+
+//Step 14 of the algorithm
+function aaInsertLastNodeInCommonAncestor(
+ p: Parser,
+ commonAncestor: T['parentNode'],
+ lastElement: T['element']
+): void {
+ const tn = p.treeAdapter.getTagName(commonAncestor);
+ const tid = getTagID(tn);
+
+ if (p._isElementCausesFosterParenting(tid)) {
+ p._fosterParentElement(lastElement);
+ } else {
+ const ns = p.treeAdapter.getNamespaceURI(commonAncestor);
+
+ if (tid === $.TEMPLATE && ns === NS.HTML) {
+ commonAncestor = p.treeAdapter.getTemplateContent(commonAncestor);
+ }
+
+ p.treeAdapter.appendChild(commonAncestor, lastElement);
+ }
+}
+
+//Steps 15-19 of the algorithm
+function aaReplaceFormattingElement(
+ p: Parser,
+ furthestBlock: T['parentNode'],
+ formattingElementEntry: ElementEntry
+): void {
+ const ns = p.treeAdapter.getNamespaceURI(formattingElementEntry.element);
+ const { token } = formattingElementEntry;
+ const newElement = p.treeAdapter.createElement(token.tagName, ns, token.attrs);
+
+ p._adoptNodes(furthestBlock, newElement);
+ p.treeAdapter.appendChild(furthestBlock, newElement);
+
+ p.activeFormattingElements.insertElementAfterBookmark(newElement, token);
+ p.activeFormattingElements.removeEntry(formattingElementEntry);
+
+ p.openElements.remove(formattingElementEntry.element);
+ p.openElements.insertAfter(furthestBlock, newElement, token.tagID);
+}
+
+//Algorithm entry point
+function callAdoptionAgency(p: Parser, token: TagToken): void {
+ for (let i = 0; i < AA_OUTER_LOOP_ITER; i++) {
+ const formattingElementEntry = aaObtainFormattingElementEntry(p, token);
+
+ if (!formattingElementEntry) {
+ break;
+ }
+
+ const furthestBlock = aaObtainFurthestBlock(p, formattingElementEntry);
+
+ if (!furthestBlock) {
+ break;
+ }
+
+ p.activeFormattingElements.bookmark = formattingElementEntry;
+
+ const lastElement = aaInnerLoop(p, furthestBlock, formattingElementEntry.element);
+ const commonAncestor = p.openElements.getCommonAncestor(formattingElementEntry.element);
+
+ p.treeAdapter.detachNode(lastElement);
+ if (commonAncestor) aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement);
+ aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry);
+ }
+}
+
+//Generic token handlers
+//------------------------------------------------------------------
+function appendComment(p: Parser, token: CommentToken): void {
+ p._appendCommentNode(token, p.openElements.currentTmplContentOrNode);
+}
+
+function appendCommentToRootHtmlElement(p: Parser, token: CommentToken): void {
+ p._appendCommentNode(token, p.openElements.items[0]);
+}
+
+function appendCommentToDocument(p: Parser, token: CommentToken): void {
+ p._appendCommentNode(token, p.document);
+}
+
+function stopParsing(p: Parser, token: EOFToken): void {
+ p.stopped = true;
+
+ // NOTE: Set end locations for elements that remain on the open element stack.
+ if (token.location) {
+ // NOTE: If we are not in a fragment, `html` and `body` will stay on the stack.
+ // This is a problem, as we might overwrite their end position here.
+ const target = p.fragmentContext ? 0 : 2;
+ for (let i = p.openElements.stackTop; i >= target; i--) {
+ p._setEndLocation(p.openElements.items[i], token);
+ }
+
+ // Handle `html` and `body`
+ if (!p.fragmentContext && p.openElements.stackTop >= 0) {
+ const htmlElement = p.openElements.items[0];
+ const htmlLocation = p.treeAdapter.getNodeSourceCodeLocation(htmlElement);
+ if (htmlLocation && !htmlLocation.endTag) {
+ p._setEndLocation(htmlElement, token);
+
+ if (p.openElements.stackTop >= 1) {
+ const bodyElement = p.openElements.items[1];
+ const bodyLocation = p.treeAdapter.getNodeSourceCodeLocation(bodyElement);
+ if (bodyLocation && !bodyLocation.endTag) {
+ p._setEndLocation(bodyElement, token);
+ }
+ }
+ }
+ }
+ }
+}
+
+// The "initial" insertion mode
+//------------------------------------------------------------------
+function doctypeInInitialMode(p: Parser, token: DoctypeToken): void {
+ p._setDocumentType(token);
+
+ const mode = token.forceQuirks ? DOCUMENT_MODE.QUIRKS : doctype.getDocumentMode(token);
+
+ if (!doctype.isConforming(token)) {
+ p._err(token, ERR.nonConformingDoctype);
+ }
+
+ p.treeAdapter.setDocumentMode(p.document, mode);
+
+ p.insertionMode = InsertionMode.BEFORE_HTML;
+}
+
+function tokenInInitialMode(p: Parser, token: Token): void {
+ p._err(token, ERR.missingDoctype, true);
+ p.treeAdapter.setDocumentMode(p.document, DOCUMENT_MODE.QUIRKS);
+ p.insertionMode = InsertionMode.BEFORE_HTML;
+ p._processToken(token);
+}
+
+// The "before html" insertion mode
+//------------------------------------------------------------------
+function startTagBeforeHtml(p: Parser, token: TagToken): void {
+ if (token.tagID === $.HTML) {
+ p._insertElement(token, NS.HTML);
+ p.insertionMode = InsertionMode.BEFORE_HEAD;
+ } else {
+ tokenBeforeHtml(p, token);
+ }
+}
+
+function endTagBeforeHtml(p: Parser, token: TagToken): void {
+ const tn = token.tagID;
+
+ if (tn === $.HTML || tn === $.HEAD || tn === $.BODY || tn === $.BR) {
+ tokenBeforeHtml(p, token);
+ }
+}
+
+function tokenBeforeHtml(p: Parser, token: Token): void {
+ p._insertFakeRootElement();
+ p.insertionMode = InsertionMode.BEFORE_HEAD;
+ p._processToken(token);
+}
+
+// The "before head" insertion mode
+//------------------------------------------------------------------
+function startTagBeforeHead(p: Parser, token: TagToken): void {
+ switch (token.tagID) {
+ case $.HTML: {
+ startTagInBody(p, token);
+ break;
+ }
+ case $.HEAD: {
+ p._insertElement(token, NS.HTML);
+ p.headElement = p.openElements.current;
+ p.insertionMode = InsertionMode.IN_HEAD;
+ break;
+ }
+ default: {
+ tokenBeforeHead(p, token);
+ }
+ }
+}
+
+function endTagBeforeHead(p: Parser, token: TagToken): void {
+ const tn = token.tagID;
+
+ if (tn === $.HEAD || tn === $.BODY || tn === $.HTML || tn === $.BR) {
+ tokenBeforeHead(p, token);
+ } else {
+ p._err(token, ERR.endTagWithoutMatchingOpenElement);
+ }
+}
+
+function tokenBeforeHead(p: Parser, token: Token): void {
+ p._insertFakeElement(TN.HEAD, $.HEAD);
+ p.headElement = p.openElements.current;
+ p.insertionMode = InsertionMode.IN_HEAD;
+ p._processToken(token);
+}
+
+// The "in head" insertion mode
+//------------------------------------------------------------------
+function startTagInHead(p: Parser, token: TagToken): void {
+ switch (token.tagID) {
+ case $.HTML: {
+ startTagInBody(p, token);
+ break;
+ }
+ case $.BASE:
+ case $.BASEFONT:
+ case $.BGSOUND:
+ case $.LINK:
+ case $.META: {
+ p._appendElement(token, NS.HTML);
+ token.ackSelfClosing = true;
+ break;
+ }
+ case $.TITLE: {
+ p._switchToTextParsing(token, TokenizerMode.RCDATA);
+ break;
+ }
+ case $.NOSCRIPT: {
+ if (p.options.scriptingEnabled) {
+ p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
+ } else {
+ p._insertElement(token, NS.HTML);
+ p.insertionMode = InsertionMode.IN_HEAD_NO_SCRIPT;
+ }
+ break;
+ }
+ case $.NOFRAMES:
+ case $.STYLE: {
+ p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
+ break;
+ }
+ case $.SCRIPT: {
+ p._switchToTextParsing(token, TokenizerMode.SCRIPT_DATA);
+ break;
+ }
+ case $.TEMPLATE: {
+ p._insertTemplate(token);
+ p.activeFormattingElements.insertMarker();
+ p.framesetOk = false;
+ p.insertionMode = InsertionMode.IN_TEMPLATE;
+ p.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
+ break;
+ }
+ case $.HEAD: {
+ p._err(token, ERR.misplacedStartTagForHeadElement);
+ break;
+ }
+ default: {
+ tokenInHead(p, token);
+ }
+ }
+}
+
+function endTagInHead(p: Parser, token: TagToken): void {
+ switch (token.tagID) {
+ case $.HEAD: {
+ p.openElements.pop();
+ p.insertionMode = InsertionMode.AFTER_HEAD;
+ break;
+ }
+ case $.BODY:
+ case $.BR:
+ case $.HTML: {
+ tokenInHead(p, token);
+ break;
+ }
+ case $.TEMPLATE: {
+ if (p.openElements.tmplCount > 0) {
+ p.openElements.generateImpliedEndTagsThoroughly();
+
+ if (p.openElements.currentTagId !== $.TEMPLATE) {
+ p._err(token, ERR.closingOfElementWithOpenChildElements);
+ }
+
+ p.openElements.popUntilTagNamePopped($.TEMPLATE);
+ p.activeFormattingElements.clearToLastMarker();
+ p.tmplInsertionModeStack.shift();
+ p._resetInsertionMode();
+ } else {
+ p._err(token, ERR.endTagWithoutMatchingOpenElement);
+ }
+ break;
+ }
+ default: {
+ p._err(token, ERR.endTagWithoutMatchingOpenElement);
+ }
+ }
+}
+
+function tokenInHead(p: Parser, token: Token): void {
+ p.openElements.pop();
+ p.insertionMode = InsertionMode.AFTER_HEAD;
+ p._processToken(token);
+}
+
+// The "in head no script" insertion mode
+//------------------------------------------------------------------
+function startTagInHeadNoScript(p: Parser, token: TagToken): void {
+ switch (token.tagID) {
+ case $.HTML: {
+ startTagInBody(p, token);
+ break;
+ }
+ case $.BASEFONT:
+ case $.BGSOUND:
+ case $.HEAD:
+ case $.LINK:
+ case $.META:
+ case $.NOFRAMES:
+ case $.STYLE: {
+ startTagInHead(p, token);
+ break;
+ }
+ case $.NOSCRIPT: {
+ p._err(token, ERR.nestedNoscriptInHead);
+ break;
+ }
+ default: {
+ tokenInHeadNoScript(p, token);
+ }
+ }
+}
+
+function endTagInHeadNoScript(p: Parser, token: TagToken): void {
+ switch (token.tagID) {
+ case $.NOSCRIPT: {
+ p.openElements.pop();
+ p.insertionMode = InsertionMode.IN_HEAD;
+ break;
+ }
+ case $.BR: {
+ tokenInHeadNoScript(p, token);
+ break;
+ }
+ default: {
+ p._err(token, ERR.endTagWithoutMatchingOpenElement);
+ }
+ }
+}
+
+function tokenInHeadNoScript(p: Parser, token: Token): void {
+ const errCode = token.type === TokenType.EOF ? ERR.openElementsLeftAfterEof : ERR.disallowedContentInNoscriptInHead;
+
+ p._err(token, errCode);
+ p.openElements.pop();
+ p.insertionMode = InsertionMode.IN_HEAD;
+ p._processToken(token);
+}
+
+// The "after head" insertion mode
+//------------------------------------------------------------------
+function startTagAfterHead(p: Parser, token: TagToken): void {
+ switch (token.tagID) {
+ case $.HTML: {
+ startTagInBody(p, token);
+ break;
+ }
+ case $.BODY: {
+ p._insertElement(token, NS.HTML);
+ p.framesetOk = false;
+ p.insertionMode = InsertionMode.IN_BODY;
+ break;
+ }
+ case $.FRAMESET: {
+ p._insertElement(token, NS.HTML);
+ p.insertionMode = InsertionMode.IN_FRAMESET;
+ break;
+ }
+ case $.BASE:
+ case $.BASEFONT:
+ case $.BGSOUND:
+ case $.LINK:
+ case $.META:
+ case $.NOFRAMES:
+ case $.SCRIPT:
+ case $.STYLE:
+ case $.TEMPLATE:
+ case $.TITLE: {
+ p._err(token, ERR.abandonedHeadElementChild);
+ p.openElements.push(p.headElement!, $.HEAD);
+ startTagInHead(p, token);
+ p.openElements.remove(p.headElement!);
+ break;
+ }
+ case $.HEAD: {
+ p._err(token, ERR.misplacedStartTagForHeadElement);
+ break;
+ }
+ default: {
+ tokenAfterHead(p, token);
+ }
+ }
+}
+
+function endTagAfterHead(p: Parser, token: TagToken): void {
+ switch (token.tagID) {
+ case $.BODY:
+ case $.HTML:
+ case $.BR: {
+ tokenAfterHead(p, token);
+ break;
+ }
+ case $.TEMPLATE: {
+ endTagInHead(p, token);
+ break;
+ }
+ default: {
+ p._err(token, ERR.endTagWithoutMatchingOpenElement);
+ }
+ }
+}
+
+function tokenAfterHead(p: Parser, token: Token): void {
+ p._insertFakeElement(TN.BODY, $.BODY);
+ p.insertionMode = InsertionMode.IN_BODY;
+ modeInBody(p, token);
+}
+
+// The "in body" insertion mode
+//------------------------------------------------------------------
+function modeInBody(p: Parser, token: Token): void {
+ switch (token.type) {
+ case TokenType.CHARACTER: {
+ characterInBody(p, token);
+ break;
+ }
+ case TokenType.WHITESPACE_CHARACTER: {
+ whitespaceCharacterInBody(p, token);
+ break;
+ }
+ case TokenType.COMMENT: {
+ appendComment(p, token);
+ break;
+ }
+ case TokenType.START_TAG: {
+ startTagInBody(p, token);
+ break;
+ }
+ case TokenType.END_TAG: {
+ endTagInBody(p, token);
+ break;
+ }
+ case TokenType.EOF: {
+ eofInBody(p, token);
+ break;
+ }
+ default:
+ // Do nothing
+ }
+}
+
+function whitespaceCharacterInBody(p: Parser, token: CharacterToken): void {
+ p._reconstructActiveFormattingElements();
+ p._insertCharacters(token);
+}
+
+function characterInBody(p: Parser, token: CharacterToken): void {
+ p._reconstructActiveFormattingElements();
+ p._insertCharacters(token);
+ p.framesetOk = false;
+}
+
+function htmlStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.tmplCount === 0) {
+ p.treeAdapter.adoptAttributes(p.openElements.items[0], token.attrs);
+ }
+}
+
+function bodyStartTagInBody(p: Parser, token: TagToken): void {
+ const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
+
+ if (bodyElement && p.openElements.tmplCount === 0) {
+ p.framesetOk = false;
+ p.treeAdapter.adoptAttributes(bodyElement, token.attrs);
+ }
+}
+
+function framesetStartTagInBody(p: Parser, token: TagToken): void {
+ const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
+
+ if (p.framesetOk && bodyElement) {
+ p.treeAdapter.detachNode(bodyElement);
+ p.openElements.popAllUpToHtmlElement();
+ p._insertElement(token, NS.HTML);
+ p.insertionMode = InsertionMode.IN_FRAMESET;
+ }
+}
+
+function addressStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInButtonScope($.P)) {
+ p._closePElement();
+ }
+
+ p._insertElement(token, NS.HTML);
+}
+
+function numberedHeaderStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInButtonScope($.P)) {
+ p._closePElement();
+ }
+
+ if (isNumberedHeader(p.openElements.currentTagId)) {
+ p.openElements.pop();
+ }
+
+ p._insertElement(token, NS.HTML);
+}
+
+function preStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInButtonScope($.P)) {
+ p._closePElement();
+ }
+
+ p._insertElement(token, NS.HTML);
+ //NOTE: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move
+ //on to the next one. (Newlines at the start of pre blocks are ignored as an authoring convenience.)
+ p.skipNextNewLine = true;
+ p.framesetOk = false;
+}
+
+function formStartTagInBody(p: Parser, token: TagToken): void {
+ const inTemplate = p.openElements.tmplCount > 0;
+
+ if (!p.formElement || inTemplate) {
+ if (p.openElements.hasInButtonScope($.P)) {
+ p._closePElement();
+ }
+
+ p._insertElement(token, NS.HTML);
+
+ if (!inTemplate) {
+ p.formElement = p.openElements.current;
+ }
+ }
+}
+
+function listItemStartTagInBody(p: Parser, token: TagToken): void {
+ p.framesetOk = false;
+
+ const tn = token.tagID;
+
+ for (let i = p.openElements.stackTop; i >= 0; i--) {
+ const elementId = p.openElements.tagIDs[i];
+
+ if (
+ (tn === $.LI && elementId === $.LI) ||
+ ((tn === $.DD || tn === $.DT) && (elementId === $.DD || elementId === $.DT))
+ ) {
+ p.openElements.generateImpliedEndTagsWithExclusion(elementId);
+ p.openElements.popUntilTagNamePopped(elementId);
+ break;
+ }
+
+ if (
+ elementId !== $.ADDRESS &&
+ elementId !== $.DIV &&
+ elementId !== $.P &&
+ p._isSpecialElement(p.openElements.items[i], elementId)
+ ) {
+ break;
+ }
+ }
+
+ if (p.openElements.hasInButtonScope($.P)) {
+ p._closePElement();
+ }
+
+ p._insertElement(token, NS.HTML);
+}
+
+function plaintextStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInButtonScope($.P)) {
+ p._closePElement();
+ }
+
+ p._insertElement(token, NS.HTML);
+ p.tokenizer.state = TokenizerMode.PLAINTEXT;
+}
+
+function buttonStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInScope($.BUTTON)) {
+ p.openElements.generateImpliedEndTags();
+ p.openElements.popUntilTagNamePopped($.BUTTON);
+ }
+
+ p._reconstructActiveFormattingElements();
+ p._insertElement(token, NS.HTML);
+ p.framesetOk = false;
+}
+
+function aStartTagInBody(p: Parser, token: TagToken): void {
+ const activeElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(TN.A);
+
+ if (activeElementEntry) {
+ callAdoptionAgency(p, token);
+ p.openElements.remove(activeElementEntry.element);
+ p.activeFormattingElements.removeEntry(activeElementEntry);
+ }
+
+ p._reconstructActiveFormattingElements();
+ p._insertElement(token, NS.HTML);
+ p.activeFormattingElements.pushElement(p.openElements.current, token);
+}
+
+function bStartTagInBody(p: Parser, token: TagToken): void {
+ p._reconstructActiveFormattingElements();
+ p._insertElement(token, NS.HTML);
+ p.activeFormattingElements.pushElement(p.openElements.current, token);
+}
+
+function nobrStartTagInBody(p: Parser, token: TagToken): void {
+ p._reconstructActiveFormattingElements();
+
+ if (p.openElements.hasInScope($.NOBR)) {
+ callAdoptionAgency(p, token);
+ p._reconstructActiveFormattingElements();
+ }
+
+ p._insertElement(token, NS.HTML);
+ p.activeFormattingElements.pushElement(p.openElements.current, token);
+}
+
+function appletStartTagInBody(p: Parser, token: TagToken): void {
+ p._reconstructActiveFormattingElements();
+ p._insertElement(token, NS.HTML);
+ p.activeFormattingElements.insertMarker();
+ p.framesetOk = false;
+}
+
+function tableStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.treeAdapter.getDocumentMode(p.document) !== DOCUMENT_MODE.QUIRKS && p.openElements.hasInButtonScope($.P)) {
+ p._closePElement();
+ }
+
+ p._insertElement(token, NS.HTML);
+ p.framesetOk = false;
+ p.insertionMode = InsertionMode.IN_TABLE;
+}
+
+function areaStartTagInBody(p: Parser, token: TagToken): void {
+ p._reconstructActiveFormattingElements();
+ p._appendElement(token, NS.HTML);
+ p.framesetOk = false;
+ token.ackSelfClosing = true;
+}
+
+function isHiddenInput(token: TagToken): boolean {
+ const inputType = getTokenAttr(token, ATTRS.TYPE);
+
+ return inputType != null && inputType.toLowerCase() === HIDDEN_INPUT_TYPE;
+}
+
+function inputStartTagInBody(p: Parser, token: TagToken): void {
+ p._reconstructActiveFormattingElements();
+ p._appendElement(token, NS.HTML);
+
+ if (!isHiddenInput(token)) {
+ p.framesetOk = false;
+ }
+
+ token.ackSelfClosing = true;
+}
+
+function paramStartTagInBody(p: Parser, token: TagToken): void {
+ p._appendElement(token, NS.HTML);
+ token.ackSelfClosing = true;
+}
+
+function hrStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInButtonScope($.P)) {
+ p._closePElement();
+ }
+
+ p._appendElement(token, NS.HTML);
+ p.framesetOk = false;
+ token.ackSelfClosing = true;
+}
+
+function imageStartTagInBody(p: Parser, token: TagToken): void {
+ token.tagName = TN.IMG;
+ token.tagID = $.IMG;
+ areaStartTagInBody(p, token);
+}
+
+function textareaStartTagInBody(p: Parser, token: TagToken): void {
+ p._insertElement(token, NS.HTML);
+ //NOTE: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move
+ //on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
+ p.skipNextNewLine = true;
+ p.tokenizer.state = TokenizerMode.RCDATA;
+ p.originalInsertionMode = p.insertionMode;
+ p.framesetOk = false;
+ p.insertionMode = InsertionMode.TEXT;
+}
+
+function xmpStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInButtonScope($.P)) {
+ p._closePElement();
+ }
+
+ p._reconstructActiveFormattingElements();
+ p.framesetOk = false;
+ p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
+}
+
+function iframeStartTagInBody(p: Parser, token: TagToken): void {
+ p.framesetOk = false;
+ p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
+}
+
+//NOTE: here we assume that we always act as an user agent with enabled plugins, so we parse
+// as rawtext.
+function noembedStartTagInBody(p: Parser, token: TagToken): void {
+ p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
+}
+
+function selectStartTagInBody(p: Parser, token: TagToken): void {
+ p._reconstructActiveFormattingElements();
+ p._insertElement(token, NS.HTML);
+ p.framesetOk = false;
+
+ p.insertionMode =
+ p.insertionMode === InsertionMode.IN_TABLE ||
+ p.insertionMode === InsertionMode.IN_CAPTION ||
+ p.insertionMode === InsertionMode.IN_TABLE_BODY ||
+ p.insertionMode === InsertionMode.IN_ROW ||
+ p.insertionMode === InsertionMode.IN_CELL
+ ? InsertionMode.IN_SELECT_IN_TABLE
+ : InsertionMode.IN_SELECT;
+}
+
+function optgroupStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.currentTagId === $.OPTION) {
+ p.openElements.pop();
+ }
+
+ p._reconstructActiveFormattingElements();
+ p._insertElement(token, NS.HTML);
+}
+
+function rbStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInScope($.RUBY)) {
+ p.openElements.generateImpliedEndTags();
+ }
+
+ p._insertElement(token, NS.HTML);
+}
+
+function rtStartTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInScope($.RUBY)) {
+ p.openElements.generateImpliedEndTagsWithExclusion($.RTC);
+ }
+
+ p._insertElement(token, NS.HTML);
+}
+
+function mathStartTagInBody(p: Parser, token: TagToken): void {
+ p._reconstructActiveFormattingElements();
+
+ foreignContent.adjustTokenMathMLAttrs(token);
+ foreignContent.adjustTokenXMLAttrs(token);
+
+ if (token.selfClosing) {
+ p._appendElement(token, NS.MATHML);
+ } else {
+ p._insertElement(token, NS.MATHML);
+ }
+
+ token.ackSelfClosing = true;
+}
+
+function svgStartTagInBody(p: Parser, token: TagToken): void {
+ p._reconstructActiveFormattingElements();
+
+ foreignContent.adjustTokenSVGAttrs(token);
+ foreignContent.adjustTokenXMLAttrs(token);
+
+ if (token.selfClosing) {
+ p._appendElement(token, NS.SVG);
+ } else {
+ p._insertElement(token, NS.SVG);
+ }
+
+ token.ackSelfClosing = true;
+}
+
+function genericStartTagInBody(p: Parser, token: TagToken): void {
+ p._reconstructActiveFormattingElements();
+ p._insertElement(token, NS.HTML);
+}
+
+function startTagInBody(p: Parser, token: TagToken): void {
+ switch (token.tagID) {
+ case $.I:
+ case $.S:
+ case $.B:
+ case $.U:
+ case $.EM:
+ case $.TT:
+ case $.BIG:
+ case $.CODE:
+ case $.FONT:
+ case $.SMALL:
+ case $.STRIKE:
+ case $.STRONG: {
+ bStartTagInBody(p, token);
+ break;
+ }
+ case $.A: {
+ aStartTagInBody(p, token);
+ break;
+ }
+ case $.H1:
+ case $.H2:
+ case $.H3:
+ case $.H4:
+ case $.H5:
+ case $.H6: {
+ numberedHeaderStartTagInBody(p, token);
+ break;
+ }
+ case $.P:
+ case $.DL:
+ case $.OL:
+ case $.UL:
+ case $.DIV:
+ case $.DIR:
+ case $.NAV:
+ case $.MAIN:
+ case $.MENU:
+ case $.ASIDE:
+ case $.CENTER:
+ case $.FIGURE:
+ case $.FOOTER:
+ case $.HEADER:
+ case $.HGROUP:
+ case $.DIALOG:
+ case $.DETAILS:
+ case $.ADDRESS:
+ case $.ARTICLE:
+ case $.SECTION:
+ case $.SUMMARY:
+ case $.FIELDSET:
+ case $.BLOCKQUOTE:
+ case $.FIGCAPTION: {
+ addressStartTagInBody(p, token);
+ break;
+ }
+ case $.LI:
+ case $.DD:
+ case $.DT: {
+ listItemStartTagInBody(p, token);
+ break;
+ }
+ case $.BR:
+ case $.IMG:
+ case $.WBR:
+ case $.AREA:
+ case $.EMBED:
+ case $.KEYGEN: {
+ areaStartTagInBody(p, token);
+ break;
+ }
+ case $.HR: {
+ hrStartTagInBody(p, token);
+ break;
+ }
+ case $.RB:
+ case $.RTC: {
+ rbStartTagInBody(p, token);
+ break;
+ }
+ case $.RT:
+ case $.RP: {
+ rtStartTagInBody(p, token);
+ break;
+ }
+ case $.PRE:
+ case $.LISTING: {
+ preStartTagInBody(p, token);
+ break;
+ }
+ case $.XMP: {
+ xmpStartTagInBody(p, token);
+ break;
+ }
+ case $.SVG: {
+ svgStartTagInBody(p, token);
+ break;
+ }
+ case $.HTML: {
+ htmlStartTagInBody(p, token);
+ break;
+ }
+ case $.BASE:
+ case $.LINK:
+ case $.META:
+ case $.STYLE:
+ case $.TITLE:
+ case $.SCRIPT:
+ case $.BGSOUND:
+ case $.BASEFONT:
+ case $.TEMPLATE: {
+ startTagInHead(p, token);
+ break;
+ }
+ case $.BODY: {
+ bodyStartTagInBody(p, token);
+ break;
+ }
+ case $.FORM: {
+ formStartTagInBody(p, token);
+ break;
+ }
+ case $.NOBR: {
+ nobrStartTagInBody(p, token);
+ break;
+ }
+ case $.MATH: {
+ mathStartTagInBody(p, token);
+ break;
+ }
+ case $.TABLE: {
+ tableStartTagInBody(p, token);
+ break;
+ }
+ case $.INPUT: {
+ inputStartTagInBody(p, token);
+ break;
+ }
+ case $.PARAM:
+ case $.TRACK:
+ case $.SOURCE: {
+ paramStartTagInBody(p, token);
+ break;
+ }
+ case $.IMAGE: {
+ imageStartTagInBody(p, token);
+ break;
+ }
+ case $.BUTTON: {
+ buttonStartTagInBody(p, token);
+ break;
+ }
+ case $.APPLET:
+ case $.OBJECT:
+ case $.MARQUEE: {
+ appletStartTagInBody(p, token);
+ break;
+ }
+ case $.IFRAME: {
+ iframeStartTagInBody(p, token);
+ break;
+ }
+ case $.SELECT: {
+ selectStartTagInBody(p, token);
+ break;
+ }
+ case $.OPTION:
+ case $.OPTGROUP: {
+ optgroupStartTagInBody(p, token);
+ break;
+ }
+ case $.NOEMBED: {
+ noembedStartTagInBody(p, token);
+ break;
+ }
+ case $.FRAMESET: {
+ framesetStartTagInBody(p, token);
+ break;
+ }
+ case $.TEXTAREA: {
+ textareaStartTagInBody(p, token);
+ break;
+ }
+ case $.NOSCRIPT: {
+ if (p.options.scriptingEnabled) {
+ noembedStartTagInBody(p, token);
+ } else {
+ genericStartTagInBody(p, token);
+ }
+ break;
+ }
+ case $.PLAINTEXT: {
+ plaintextStartTagInBody(p, token);
+ break;
+ }
+
+ case $.COL:
+ case $.TH:
+ case $.TD:
+ case $.TR:
+ case $.HEAD:
+ case $.FRAME:
+ case $.TBODY:
+ case $.TFOOT:
+ case $.THEAD:
+ case $.CAPTION:
+ case $.COLGROUP: {
+ // Ignore token
+ break;
+ }
+ default: {
+ genericStartTagInBody(p, token);
+ }
+ }
+}
+
+function bodyEndTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInScope($.BODY)) {
+ p.insertionMode = InsertionMode.AFTER_BODY;
+
+ //NOTE: is never popped from the stack, so we need to updated
+ //the end location explicitly.
+ if (p.options.sourceCodeLocationInfo) {
+ const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
+ if (bodyElement) {
+ p._setEndLocation(bodyElement, token);
+ }
+ }
+ }
+}
+
+function htmlEndTagInBody(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInScope($.BODY)) {
+ p.insertionMode = InsertionMode.AFTER_BODY;
+ endTagAfterBody(p, token);
+ }
+}
+
+function addressEndTagInBody(p: Parser, token: TagToken): void {
+ const tn = token.tagID;
+
+ if (p.openElements.hasInScope(tn)) {
+ p.openElements.generateImpliedEndTags();
+ p.openElements.popUntilTagNamePopped(tn);
+ }
+}
+
+function formEndTagInBody(p: Parser): void {
+ const inTemplate = p.openElements.tmplCount > 0;
+ const { formElement } = p;
+
+ if (!inTemplate) {
+ p.formElement = null;
+ }
+
+ if ((formElement || inTemplate) && p.openElements.hasInScope($.FORM)) {
+ p.openElements.generateImpliedEndTags();
+
+ if (inTemplate) {
+ p.openElements.popUntilTagNamePopped($.FORM);
+ } else if (formElement) {
+ p.openElements.remove(formElement);
+ }
+ }
+}
+
+function pEndTagInBody(p: Parser): void {
+ if (!p.openElements.hasInButtonScope($.P)) {
+ p._insertFakeElement(TN.P, $.P);
+ }
+
+ p._closePElement();
+}
+
+function liEndTagInBody(p: Parser): void {
+ if (p.openElements.hasInListItemScope($.LI)) {
+ p.openElements.generateImpliedEndTagsWithExclusion($.LI);
+ p.openElements.popUntilTagNamePopped($.LI);
+ }
+}
+
+function ddEndTagInBody(p: Parser, token: TagToken): void {
+ const tn = token.tagID;
+
+ if (p.openElements.hasInScope(tn)) {
+ p.openElements.generateImpliedEndTagsWithExclusion(tn);
+ p.openElements.popUntilTagNamePopped(tn);
+ }
+}
+
+function numberedHeaderEndTagInBody(p: Parser): void {
+ if (p.openElements.hasNumberedHeaderInScope()) {
+ p.openElements.generateImpliedEndTags();
+ p.openElements.popUntilNumberedHeaderPopped();
+ }
+}
+
+function appletEndTagInBody(p: Parser, token: TagToken): void {
+ const tn = token.tagID;
+
+ if (p.openElements.hasInScope(tn)) {
+ p.openElements.generateImpliedEndTags();
+ p.openElements.popUntilTagNamePopped(tn);
+ p.activeFormattingElements.clearToLastMarker();
+ }
+}
+
+function brEndTagInBody(p: Parser): void {
+ p._reconstructActiveFormattingElements();
+ p._insertFakeElement(TN.BR, $.BR);
+ p.openElements.pop();
+ p.framesetOk = false;
+}
+
+function genericEndTagInBody(p: Parser, token: TagToken): void {
+ const tn = token.tagName;
+ const tid = token.tagID;
+
+ for (let i = p.openElements.stackTop; i > 0; i--) {
+ const element = p.openElements.items[i];
+ const elementId = p.openElements.tagIDs[i];
+
+ // Compare the tag name here, as the tag might not be a known tag with an ID.
+ if (tid === elementId && (tid !== $.UNKNOWN || p.treeAdapter.getTagName(element) === tn)) {
+ p.openElements.generateImpliedEndTagsWithExclusion(tid);
+ if (p.openElements.stackTop >= i) p.openElements.shortenToLength(i);
+ break;
+ }
+
+ if (p._isSpecialElement(element, elementId)) {
+ break;
+ }
+ }
+}
+
+function endTagInBody(p: Parser, token: TagToken): void {
+ switch (token.tagID) {
+ case $.A:
+ case $.B:
+ case $.I:
+ case $.S:
+ case $.U:
+ case $.EM:
+ case $.TT:
+ case $.BIG:
+ case $.CODE:
+ case $.FONT:
+ case $.NOBR:
+ case $.SMALL:
+ case $.STRIKE:
+ case $.STRONG: {
+ callAdoptionAgency(p, token);
+ break;
+ }
+ case $.P: {
+ pEndTagInBody(p);
+ break;
+ }
+ case $.DL:
+ case $.UL:
+ case $.OL:
+ case $.DIR:
+ case $.DIV:
+ case $.NAV:
+ case $.PRE:
+ case $.MAIN:
+ case $.MENU:
+ case $.ASIDE:
+ case $.CENTER:
+ case $.FIGURE:
+ case $.FOOTER:
+ case $.HEADER:
+ case $.HGROUP:
+ case $.DIALOG:
+ case $.ADDRESS:
+ case $.ARTICLE:
+ case $.DETAILS:
+ case $.SECTION:
+ case $.SUMMARY:
+ case $.LISTING:
+ case $.FIELDSET:
+ case $.BLOCKQUOTE:
+ case $.FIGCAPTION: {
+ addressEndTagInBody(p, token);
+ break;
+ }
+ case $.LI: {
+ liEndTagInBody(p);
+ break;
+ }
+ case $.DD:
+ case $.DT: {
+ ddEndTagInBody(p, token);
+ break;
+ }
+ case $.H1:
+ case $.H2:
+ case $.H3:
+ case $.H4:
+ case $.H5:
+ case $.H6: {
+ numberedHeaderEndTagInBody(p);
+ break;
+ }
+ case $.BR: {
+ brEndTagInBody(p);
+ break;
+ }
+ case $.BODY: {
+ bodyEndTagInBody(p, token);
+ break;
+ }
+ case $.HTML: {
+ htmlEndTagInBody(p, token);
+ break;
+ }
+ case $.FORM: {
+ formEndTagInBody(p);
+ break;
+ }
+ case $.APPLET:
+ case $.OBJECT:
+ case $.MARQUEE: {
+ appletEndTagInBody(p, token);
+ break;
+ }
+ case $.TEMPLATE: {
+ endTagInHead(p, token);
+ break;
+ }
+ default: {
+ genericEndTagInBody(p, token);
+ }
+ }
+}
+
+function eofInBody(p: Parser, token: EOFToken): void {
+ if (p.tmplInsertionModeStack.length > 0) {
+ eofInTemplate(p, token);
+ } else {
+ stopParsing(p, token);
+ }
+}
+
+// The "text" insertion mode
+//------------------------------------------------------------------
+function endTagInText(p: Parser, token: TagToken): void {
+ if (token.tagID === $.SCRIPT) {
+ p.scriptHandler?.(p.openElements.current);
+ }
+
+ p.openElements.pop();
+ p.insertionMode = p.originalInsertionMode;
+}
+
+function eofInText(p: Parser, token: EOFToken): void {
+ p._err(token, ERR.eofInElementThatCanContainOnlyText);
+ p.openElements.pop();
+ p.insertionMode = p.originalInsertionMode;
+ p.onEof(token);
+}
+
+// The "in table" insertion mode
+//------------------------------------------------------------------
+function characterInTable(p: Parser, token: CharacterToken): void {
+ if (TABLE_STRUCTURE_TAGS.has(p.openElements.currentTagId)) {
+ p.pendingCharacterTokens.length = 0;
+ p.hasNonWhitespacePendingCharacterToken = false;
+ p.originalInsertionMode = p.insertionMode;
+ p.insertionMode = InsertionMode.IN_TABLE_TEXT;
+
+ switch (token.type) {
+ case TokenType.CHARACTER: {
+ characterInTableText(p, token);
+ break;
+ }
+ case TokenType.WHITESPACE_CHARACTER: {
+ whitespaceCharacterInTableText(p, token);
+ break;
+ }
+ // Ignore null
+ }
+ } else {
+ tokenInTable(p, token);
+ }
+}
+
+function captionStartTagInTable(p: Parser, token: TagToken): void {
+ p.openElements.clearBackToTableContext();
+ p.activeFormattingElements.insertMarker();
+ p._insertElement(token, NS.HTML);
+ p.insertionMode = InsertionMode.IN_CAPTION;
+}
+
+function colgroupStartTagInTable(p: Parser, token: TagToken): void {
+ p.openElements.clearBackToTableContext();
+ p._insertElement(token, NS.HTML);
+ p.insertionMode = InsertionMode.IN_COLUMN_GROUP;
+}
+
+function colStartTagInTable(p: Parser, token: TagToken): void {
+ p.openElements.clearBackToTableContext();
+ p._insertFakeElement(TN.COLGROUP, $.COLGROUP);
+ p.insertionMode = InsertionMode.IN_COLUMN_GROUP;
+ startTagInColumnGroup(p, token);
+}
+
+function tbodyStartTagInTable(p: Parser, token: TagToken): void {
+ p.openElements.clearBackToTableContext();
+ p._insertElement(token, NS.HTML);
+ p.insertionMode = InsertionMode.IN_TABLE_BODY;
+}
+
+function tdStartTagInTable(p: Parser, token: TagToken): void {
+ p.openElements.clearBackToTableContext();
+ p._insertFakeElement(TN.TBODY, $.TBODY);
+ p.insertionMode = InsertionMode.IN_TABLE_BODY;
+ startTagInTableBody(p, token);
+}
+
+function tableStartTagInTable(p: Parser, token: TagToken): void {
+ if (p.openElements.hasInTableScope($.TABLE)) {
+ p.openElements.popUntilTagNamePopped($.TABLE);
+ p._resetInsertionMode();
+ p._processStartTag(token);
+ }
+}
+
+function inputStartTagInTable