fix

2025-05-12 05:38:44 +09:00
parent dced21c3f8
commit 6d78bfa46e
8120 changed files with 1161564 additions and 0 deletions
--- a/book/node_modules/parse5/dist/cjs/tokenizer/index.d.ts
+++ b/book/node_modules/parse5/dist/cjs/tokenizer/index.d.ts
@@ -0,0 +1,247 @@
+import { Preprocessor } from './preprocessor.js';
+import { type Token, type CharacterToken, type DoctypeToken, type TagToken, type EOFToken, type CommentToken, type Attribute, type Location } from '../common/token.js';
+import { EntityDecoder } from 'entities/lib/decode.js';
+import { ERR, type ParserErrorHandler } from '../common/error-codes.js';
+declare const enum State {
+    DATA = 0,
+    RCDATA = 1,
+    RAWTEXT = 2,
+    SCRIPT_DATA = 3,
+    PLAINTEXT = 4,
+    TAG_OPEN = 5,
+    END_TAG_OPEN = 6,
+    TAG_NAME = 7,
+    RCDATA_LESS_THAN_SIGN = 8,
+    RCDATA_END_TAG_OPEN = 9,
+    RCDATA_END_TAG_NAME = 10,
+    RAWTEXT_LESS_THAN_SIGN = 11,
+    RAWTEXT_END_TAG_OPEN = 12,
+    RAWTEXT_END_TAG_NAME = 13,
+    SCRIPT_DATA_LESS_THAN_SIGN = 14,
+    SCRIPT_DATA_END_TAG_OPEN = 15,
+    SCRIPT_DATA_END_TAG_NAME = 16,
+    SCRIPT_DATA_ESCAPE_START = 17,
+    SCRIPT_DATA_ESCAPE_START_DASH = 18,
+    SCRIPT_DATA_ESCAPED = 19,
+    SCRIPT_DATA_ESCAPED_DASH = 20,
+    SCRIPT_DATA_ESCAPED_DASH_DASH = 21,
+    SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 22,
+    SCRIPT_DATA_ESCAPED_END_TAG_OPEN = 23,
+    SCRIPT_DATA_ESCAPED_END_TAG_NAME = 24,
+    SCRIPT_DATA_DOUBLE_ESCAPE_START = 25,
+    SCRIPT_DATA_DOUBLE_ESCAPED = 26,
+    SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 27,
+    SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 28,
+    SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 29,
+    SCRIPT_DATA_DOUBLE_ESCAPE_END = 30,
+    BEFORE_ATTRIBUTE_NAME = 31,
+    ATTRIBUTE_NAME = 32,
+    AFTER_ATTRIBUTE_NAME = 33,
+    BEFORE_ATTRIBUTE_VALUE = 34,
+    ATTRIBUTE_VALUE_DOUBLE_QUOTED = 35,
+    ATTRIBUTE_VALUE_SINGLE_QUOTED = 36,
+    ATTRIBUTE_VALUE_UNQUOTED = 37,
+    AFTER_ATTRIBUTE_VALUE_QUOTED = 38,
+    SELF_CLOSING_START_TAG = 39,
+    BOGUS_COMMENT = 40,
+    MARKUP_DECLARATION_OPEN = 41,
+    COMMENT_START = 42,
+    COMMENT_START_DASH = 43,
+    COMMENT = 44,
+    COMMENT_LESS_THAN_SIGN = 45,
+    COMMENT_LESS_THAN_SIGN_BANG = 46,
+    COMMENT_LESS_THAN_SIGN_BANG_DASH = 47,
+    COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH = 48,
+    COMMENT_END_DASH = 49,
+    COMMENT_END = 50,
+    COMMENT_END_BANG = 51,
+    DOCTYPE = 52,
+    BEFORE_DOCTYPE_NAME = 53,
+    DOCTYPE_NAME = 54,
+    AFTER_DOCTYPE_NAME = 55,
+    AFTER_DOCTYPE_PUBLIC_KEYWORD = 56,
+    BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 57,
+    DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 58,
+    DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 59,
+    AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 60,
+    BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 61,
+    AFTER_DOCTYPE_SYSTEM_KEYWORD = 62,
+    BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 63,
+    DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 64,
+    DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 65,
+    AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 66,
+    BOGUS_DOCTYPE = 67,
+    CDATA_SECTION = 68,
+    CDATA_SECTION_BRACKET = 69,
+    CDATA_SECTION_END = 70,
+    CHARACTER_REFERENCE = 71,
+    AMBIGUOUS_AMPERSAND = 72
+}
+export declare const TokenizerMode: {
+    readonly DATA: State.DATA;
+    readonly RCDATA: State.RCDATA;
+    readonly RAWTEXT: State.RAWTEXT;
+    readonly SCRIPT_DATA: State.SCRIPT_DATA;
+    readonly PLAINTEXT: State.PLAINTEXT;
+    readonly CDATA_SECTION: State.CDATA_SECTION;
+};
+export interface TokenizerOptions {
+    sourceCodeLocationInfo?: boolean;
+}
+export interface TokenHandler {
+    onComment(token: CommentToken): void;
+    onDoctype(token: DoctypeToken): void;
+    onStartTag(token: TagToken): void;
+    onEndTag(token: TagToken): void;
+    onEof(token: EOFToken): void;
+    onCharacter(token: CharacterToken): void;
+    onNullCharacter(token: CharacterToken): void;
+    onWhitespaceCharacter(token: CharacterToken): void;
+    onParseError?: ParserErrorHandler | null;
+}
+export declare class Tokenizer {
+    protected options: TokenizerOptions;
+    protected handler: TokenHandler;
+    preprocessor: Preprocessor;
+    protected paused: boolean;
+    /** Ensures that the parsing loop isn't run multiple times at once. */
+    protected inLoop: boolean;
+    /**
+     * Indicates that the current adjusted node exists, is not an element in the HTML namespace,
+     * and that it is not an integration point for either MathML or HTML.
+     *
+     * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
+     */
+    inForeignNode: boolean;
+    lastStartTagName: string;
+    active: boolean;
+    state: State;
+    protected returnState: State;
+    /**
+     * We use `entities`' `EntityDecoder` to parse character references.
+     *
+     * All of the following states are handled by the `EntityDecoder`:
+     *
+     * - Named character reference state
+     * - Numeric character reference state
+     * - Hexademical character reference start state
+     * - Hexademical character reference state
+     * - Decimal character reference state
+     * - Numeric character reference end state
+     */
+    protected entityDecoder: EntityDecoder;
+    protected entityStartPos: number;
+    protected consumedAfterSnapshot: number;
+    protected currentLocation: Location | null;
+    protected currentCharacterToken: CharacterToken | null;
+    protected currentToken: Token | null;
+    protected currentAttr: Attribute;
+    constructor(options: TokenizerOptions, handler: TokenHandler);
+    protected _err(code: ERR, cpOffset?: number): void;
+    protected getCurrentLocation(offset: number): Location | null;
+    protected _runParsingLoop(): void;
+    pause(): void;
+    resume(writeCallback?: () => void): void;
+    write(chunk: string, isLastChunk: boolean, writeCallback?: () => void): void;
+    insertHtmlAtCurrentPos(chunk: string): void;
+    protected _ensureHibernation(): boolean;
+    protected _consume(): number;
+    protected _advanceBy(count: number): void;
+    protected _consumeSequenceIfMatch(pattern: string, caseSensitive: boolean): boolean;
+    protected _createStartTagToken(): void;
+    protected _createEndTagToken(): void;
+    protected _createCommentToken(offset: number): void;
+    protected _createDoctypeToken(initialName: string | null): void;
+    protected _createCharacterToken(type: CharacterToken['type'], chars: string): void;
+    protected _createAttr(attrNameFirstCh: string): void;
+    protected _leaveAttrName(): void;
+    protected _leaveAttrValue(): void;
+    protected prepareToken(ct: Token): void;
+    protected emitCurrentTagToken(): void;
+    protected emitCurrentComment(ct: CommentToken): void;
+    protected emitCurrentDoctype(ct: DoctypeToken): void;
+    protected _emitCurrentCharacterToken(nextLocation: Location | null): void;
+    protected _emitEOFToken(): void;
+    protected _appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string): void;
+    protected _emitCodePoint(cp: number): void;
+    protected _emitChars(ch: string): void;
+    protected _startCharacterReference(): void;
+    protected _isCharacterReferenceInAttribute(): boolean;
+    protected _flushCodePointConsumedAsCharacterReference(cp: number): void;
+    protected _callState(cp: number): void;
+    protected _stateData(cp: number): void;
+    protected _stateRcdata(cp: number): void;
+    protected _stateRawtext(cp: number): void;
+    protected _stateScriptData(cp: number): void;
+    protected _statePlaintext(cp: number): void;
+    protected _stateTagOpen(cp: number): void;
+    protected _stateEndTagOpen(cp: number): void;
+    protected _stateTagName(cp: number): void;
+    protected _stateRcdataLessThanSign(cp: number): void;
+    protected _stateRcdataEndTagOpen(cp: number): void;
+    protected handleSpecialEndTag(_cp: number): boolean;
+    protected _stateRcdataEndTagName(cp: number): void;
+    protected _stateRawtextLessThanSign(cp: number): void;
+    protected _stateRawtextEndTagOpen(cp: number): void;
+    protected _stateRawtextEndTagName(cp: number): void;
+    protected _stateScriptDataLessThanSign(cp: number): void;
+    protected _stateScriptDataEndTagOpen(cp: number): void;
+    protected _stateScriptDataEndTagName(cp: number): void;
+    protected _stateScriptDataEscapeStart(cp: number): void;
+    protected _stateScriptDataEscapeStartDash(cp: number): void;
+    protected _stateScriptDataEscaped(cp: number): void;
+    protected _stateScriptDataEscapedDash(cp: number): void;
+    protected _stateScriptDataEscapedDashDash(cp: number): void;
+    protected _stateScriptDataEscapedLessThanSign(cp: number): void;
+    protected _stateScriptDataEscapedEndTagOpen(cp: number): void;
+    protected _stateScriptDataEscapedEndTagName(cp: number): void;
+    protected _stateScriptDataDoubleEscapeStart(cp: number): void;
+    protected _stateScriptDataDoubleEscaped(cp: number): void;
+    protected _stateScriptDataDoubleEscapedDash(cp: number): void;
+    protected _stateScriptDataDoubleEscapedDashDash(cp: number): void;
+    protected _stateScriptDataDoubleEscapedLessThanSign(cp: number): void;
+    protected _stateScriptDataDoubleEscapeEnd(cp: number): void;
+    protected _stateBeforeAttributeName(cp: number): void;
+    protected _stateAttributeName(cp: number): void;
+    protected _stateAfterAttributeName(cp: number): void;
+    protected _stateBeforeAttributeValue(cp: number): void;
+    protected _stateAttributeValueDoubleQuoted(cp: number): void;
+    protected _stateAttributeValueSingleQuoted(cp: number): void;
+    protected _stateAttributeValueUnquoted(cp: number): void;
+    protected _stateAfterAttributeValueQuoted(cp: number): void;
+    protected _stateSelfClosingStartTag(cp: number): void;
+    protected _stateBogusComment(cp: number): void;
+    protected _stateMarkupDeclarationOpen(cp: number): void;
+    protected _stateCommentStart(cp: number): void;
+    protected _stateCommentStartDash(cp: number): void;
+    protected _stateComment(cp: number): void;
+    protected _stateCommentLessThanSign(cp: number): void;
+    protected _stateCommentLessThanSignBang(cp: number): void;
+    protected _stateCommentLessThanSignBangDash(cp: number): void;
+    protected _stateCommentLessThanSignBangDashDash(cp: number): void;
+    protected _stateCommentEndDash(cp: number): void;
+    protected _stateCommentEnd(cp: number): void;
+    protected _stateCommentEndBang(cp: number): void;
+    protected _stateDoctype(cp: number): void;
+    protected _stateBeforeDoctypeName(cp: number): void;
+    protected _stateDoctypeName(cp: number): void;
+    protected _stateAfterDoctypeName(cp: number): void;
+    protected _stateAfterDoctypePublicKeyword(cp: number): void;
+    protected _stateBeforeDoctypePublicIdentifier(cp: number): void;
+    protected _stateDoctypePublicIdentifierDoubleQuoted(cp: number): void;
+    protected _stateDoctypePublicIdentifierSingleQuoted(cp: number): void;
+    protected _stateAfterDoctypePublicIdentifier(cp: number): void;
+    protected _stateBetweenDoctypePublicAndSystemIdentifiers(cp: number): void;
+    protected _stateAfterDoctypeSystemKeyword(cp: number): void;
+    protected _stateBeforeDoctypeSystemIdentifier(cp: number): void;
+    protected _stateDoctypeSystemIdentifierDoubleQuoted(cp: number): void;
+    protected _stateDoctypeSystemIdentifierSingleQuoted(cp: number): void;
+    protected _stateAfterDoctypeSystemIdentifier(cp: number): void;
+    protected _stateBogusDoctype(cp: number): void;
+    protected _stateCdataSection(cp: number): void;
+    protected _stateCdataSectionBracket(cp: number): void;
+    protected _stateCdataSectionEnd(cp: number): void;
+    protected _stateCharacterReference(): void;
+    protected _stateAmbiguousAmpersand(cp: number): void;
+}
+export {};
--- a/book/node_modules/parse5/dist/cjs/tokenizer/index.js
+++ b/book/node_modules/parse5/dist/cjs/tokenizer/index.js
--- a/book/node_modules/parse5/dist/cjs/tokenizer/preprocessor.d.ts
+++ b/book/node_modules/parse5/dist/cjs/tokenizer/preprocessor.d.ts
@@ -0,0 +1,36 @@
+import { ERR, type ParserError, type ParserErrorHandler } from '../common/error-codes.js';
+export declare class Preprocessor {
+    private handler;
+    html: string;
+    pos: number;
+    private lastGapPos;
+    private gapStack;
+    private skipNextNewLine;
+    lastChunkWritten: boolean;
+    endOfChunkHit: boolean;
+    bufferWaterline: number;
+    private isEol;
+    private lineStartPos;
+    droppedBufferSize: number;
+    line: number;
+    constructor(handler: {
+        onParseError?: ParserErrorHandler | null;
+    });
+    /** The column on the current line. If we just saw a gap (eg. a surrogate pair), return the index before. */
+    get col(): number;
+    get offset(): number;
+    getError(code: ERR, cpOffset: number): ParserError;
+    private lastErrOffset;
+    private _err;
+    private _addGap;
+    private _processSurrogate;
+    willDropParsedChunk(): boolean;
+    dropParsedChunk(): void;
+    write(chunk: string, isLastChunk: boolean): void;
+    insertHtmlAtCurrentPos(chunk: string): void;
+    startsWith(pattern: string, caseSensitive: boolean): boolean;
+    peek(offset: number): number;
+    advance(): number;
+    private _checkForProblematicCharacters;
+    retreat(count: number): void;
+}
--- a/book/node_modules/parse5/dist/cjs/tokenizer/preprocessor.js
+++ b/book/node_modules/parse5/dist/cjs/tokenizer/preprocessor.js
@@ -0,0 +1,200 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.Preprocessor = void 0;
+const unicode_js_1 = require("../common/unicode.js");
+const error_codes_js_1 = require("../common/error-codes.js");
+//Const
+const DEFAULT_BUFFER_WATERLINE = 1 << 16;
+//Preprocessor
+//NOTE: HTML input preprocessing
+//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
+class Preprocessor {
+    constructor(handler) {
+        this.handler = handler;
+        this.html = '';
+        this.pos = -1;
+        // NOTE: Initial `lastGapPos` is -2, to ensure `col` on initialisation is 0
+        this.lastGapPos = -2;
+        this.gapStack = [];
+        this.skipNextNewLine = false;
+        this.lastChunkWritten = false;
+        this.endOfChunkHit = false;
+        this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
+        this.isEol = false;
+        this.lineStartPos = 0;
+        this.droppedBufferSize = 0;
+        this.line = 1;
+        //NOTE: avoid reporting errors twice on advance/retreat
+        this.lastErrOffset = -1;
+    }
+    /** The column on the current line. If we just saw a gap (eg. a surrogate pair), return the index before. */
+    get col() {
+        return this.pos - this.lineStartPos + Number(this.lastGapPos !== this.pos);
+    }
+    get offset() {
+        return this.droppedBufferSize + this.pos;
+    }
+    getError(code, cpOffset) {
+        const { line, col, offset } = this;
+        const startCol = col + cpOffset;
+        const startOffset = offset + cpOffset;
+        return {
+            code,
+            startLine: line,
+            endLine: line,
+            startCol,
+            endCol: startCol,
+            startOffset,
+            endOffset: startOffset,
+        };
+    }
+    _err(code) {
+        if (this.handler.onParseError && this.lastErrOffset !== this.offset) {
+            this.lastErrOffset = this.offset;
+            this.handler.onParseError(this.getError(code, 0));
+        }
+    }
+    _addGap() {
+        this.gapStack.push(this.lastGapPos);
+        this.lastGapPos = this.pos;
+    }
+    _processSurrogate(cp) {
+        //NOTE: try to peek a surrogate pair
+        if (this.pos !== this.html.length - 1) {
+            const nextCp = this.html.charCodeAt(this.pos + 1);
+            if ((0, unicode_js_1.isSurrogatePair)(nextCp)) {
+                //NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
+                this.pos++;
+                //NOTE: add a gap that should be avoided during retreat
+                this._addGap();
+                return (0, unicode_js_1.getSurrogatePairCodePoint)(cp, nextCp);
+            }
+        }
+        //NOTE: we are at the end of a chunk, therefore we can't infer the surrogate pair yet.
+        else if (!this.lastChunkWritten) {
+            this.endOfChunkHit = true;
+            return unicode_js_1.CODE_POINTS.EOF;
+        }
+        //NOTE: isolated surrogate
+        this._err(error_codes_js_1.ERR.surrogateInInputStream);
+        return cp;
+    }
+    willDropParsedChunk() {
+        return this.pos > this.bufferWaterline;
+    }
+    dropParsedChunk() {
+        if (this.willDropParsedChunk()) {
+            this.html = this.html.substring(this.pos);
+            this.lineStartPos -= this.pos;
+            this.droppedBufferSize += this.pos;
+            this.pos = 0;
+            this.lastGapPos = -2;
+            this.gapStack.length = 0;
+        }
+    }
+    write(chunk, isLastChunk) {
+        if (this.html.length > 0) {
+            this.html += chunk;
+        }
+        else {
+            this.html = chunk;
+        }
+        this.endOfChunkHit = false;
+        this.lastChunkWritten = isLastChunk;
+    }
+    insertHtmlAtCurrentPos(chunk) {
+        this.html = this.html.substring(0, this.pos + 1) + chunk + this.html.substring(this.pos + 1);
+        this.endOfChunkHit = false;
+    }
+    startsWith(pattern, caseSensitive) {
+        // Check if our buffer has enough characters
+        if (this.pos + pattern.length > this.html.length) {
+            this.endOfChunkHit = !this.lastChunkWritten;
+            return false;
+        }
+        if (caseSensitive) {
+            return this.html.startsWith(pattern, this.pos);
+        }
+        for (let i = 0; i < pattern.length; i++) {
+            const cp = this.html.charCodeAt(this.pos + i) | 0x20;
+            if (cp !== pattern.charCodeAt(i)) {
+                return false;
+            }
+        }
+        return true;
+    }
+    peek(offset) {
+        const pos = this.pos + offset;
+        if (pos >= this.html.length) {
+            this.endOfChunkHit = !this.lastChunkWritten;
+            return unicode_js_1.CODE_POINTS.EOF;
+        }
+        const code = this.html.charCodeAt(pos);
+        return code === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN ? unicode_js_1.CODE_POINTS.LINE_FEED : code;
+    }
+    advance() {
+        this.pos++;
+        //NOTE: LF should be in the last column of the line
+        if (this.isEol) {
+            this.isEol = false;
+            this.line++;
+            this.lineStartPos = this.pos;
+        }
+        if (this.pos >= this.html.length) {
+            this.endOfChunkHit = !this.lastChunkWritten;
+            return unicode_js_1.CODE_POINTS.EOF;
+        }
+        let cp = this.html.charCodeAt(this.pos);
+        //NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
+        if (cp === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN) {
+            this.isEol = true;
+            this.skipNextNewLine = true;
+            return unicode_js_1.CODE_POINTS.LINE_FEED;
+        }
+        //NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
+        //must be ignored.
+        if (cp === unicode_js_1.CODE_POINTS.LINE_FEED) {
+            this.isEol = true;
+            if (this.skipNextNewLine) {
+                // `line` will be bumped again in the recursive call.
+                this.line--;
+                this.skipNextNewLine = false;
+                this._addGap();
+                return this.advance();
+            }
+        }
+        this.skipNextNewLine = false;
+        if ((0, unicode_js_1.isSurrogate)(cp)) {
+            cp = this._processSurrogate(cp);
+        }
+        //OPTIMIZATION: first check if code point is in the common allowed
+        //range (ASCII alphanumeric, whitespaces, big chunk of BMP)
+        //before going into detailed performance cost validation.
+        const isCommonValidRange = this.handler.onParseError === null ||
+            (cp > 0x1f && cp < 0x7f) ||
+            cp === unicode_js_1.CODE_POINTS.LINE_FEED ||
+            cp === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN ||
+            (cp > 0x9f && cp < 64976);
+        if (!isCommonValidRange) {
+            this._checkForProblematicCharacters(cp);
+        }
+        return cp;
+    }
+    _checkForProblematicCharacters(cp) {
+        if ((0, unicode_js_1.isControlCodePoint)(cp)) {
+            this._err(error_codes_js_1.ERR.controlCharacterInInputStream);
+        }
+        else if ((0, unicode_js_1.isUndefinedCodePoint)(cp)) {
+            this._err(error_codes_js_1.ERR.noncharacterInInputStream);
+        }
+    }
+    retreat(count) {
+        this.pos -= count;
+        while (this.pos < this.lastGapPos) {
+            this.lastGapPos = this.gapStack.pop();
+            this.pos--;
+        }
+        this.isEol = false;
+    }
+}
+exports.Preprocessor = Preprocessor;