fix
This commit is contained in:
247
book/node_modules/parse5/dist/cjs/tokenizer/index.d.ts
generated
vendored
Normal file
247
book/node_modules/parse5/dist/cjs/tokenizer/index.d.ts
generated
vendored
Normal file
@ -0,0 +1,247 @@
|
||||
import { Preprocessor } from './preprocessor.js';
|
||||
import { type Token, type CharacterToken, type DoctypeToken, type TagToken, type EOFToken, type CommentToken, type Attribute, type Location } from '../common/token.js';
|
||||
import { EntityDecoder } from 'entities/lib/decode.js';
|
||||
import { ERR, type ParserErrorHandler } from '../common/error-codes.js';
|
||||
declare const enum State {
|
||||
DATA = 0,
|
||||
RCDATA = 1,
|
||||
RAWTEXT = 2,
|
||||
SCRIPT_DATA = 3,
|
||||
PLAINTEXT = 4,
|
||||
TAG_OPEN = 5,
|
||||
END_TAG_OPEN = 6,
|
||||
TAG_NAME = 7,
|
||||
RCDATA_LESS_THAN_SIGN = 8,
|
||||
RCDATA_END_TAG_OPEN = 9,
|
||||
RCDATA_END_TAG_NAME = 10,
|
||||
RAWTEXT_LESS_THAN_SIGN = 11,
|
||||
RAWTEXT_END_TAG_OPEN = 12,
|
||||
RAWTEXT_END_TAG_NAME = 13,
|
||||
SCRIPT_DATA_LESS_THAN_SIGN = 14,
|
||||
SCRIPT_DATA_END_TAG_OPEN = 15,
|
||||
SCRIPT_DATA_END_TAG_NAME = 16,
|
||||
SCRIPT_DATA_ESCAPE_START = 17,
|
||||
SCRIPT_DATA_ESCAPE_START_DASH = 18,
|
||||
SCRIPT_DATA_ESCAPED = 19,
|
||||
SCRIPT_DATA_ESCAPED_DASH = 20,
|
||||
SCRIPT_DATA_ESCAPED_DASH_DASH = 21,
|
||||
SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 22,
|
||||
SCRIPT_DATA_ESCAPED_END_TAG_OPEN = 23,
|
||||
SCRIPT_DATA_ESCAPED_END_TAG_NAME = 24,
|
||||
SCRIPT_DATA_DOUBLE_ESCAPE_START = 25,
|
||||
SCRIPT_DATA_DOUBLE_ESCAPED = 26,
|
||||
SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 27,
|
||||
SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 28,
|
||||
SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 29,
|
||||
SCRIPT_DATA_DOUBLE_ESCAPE_END = 30,
|
||||
BEFORE_ATTRIBUTE_NAME = 31,
|
||||
ATTRIBUTE_NAME = 32,
|
||||
AFTER_ATTRIBUTE_NAME = 33,
|
||||
BEFORE_ATTRIBUTE_VALUE = 34,
|
||||
ATTRIBUTE_VALUE_DOUBLE_QUOTED = 35,
|
||||
ATTRIBUTE_VALUE_SINGLE_QUOTED = 36,
|
||||
ATTRIBUTE_VALUE_UNQUOTED = 37,
|
||||
AFTER_ATTRIBUTE_VALUE_QUOTED = 38,
|
||||
SELF_CLOSING_START_TAG = 39,
|
||||
BOGUS_COMMENT = 40,
|
||||
MARKUP_DECLARATION_OPEN = 41,
|
||||
COMMENT_START = 42,
|
||||
COMMENT_START_DASH = 43,
|
||||
COMMENT = 44,
|
||||
COMMENT_LESS_THAN_SIGN = 45,
|
||||
COMMENT_LESS_THAN_SIGN_BANG = 46,
|
||||
COMMENT_LESS_THAN_SIGN_BANG_DASH = 47,
|
||||
COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH = 48,
|
||||
COMMENT_END_DASH = 49,
|
||||
COMMENT_END = 50,
|
||||
COMMENT_END_BANG = 51,
|
||||
DOCTYPE = 52,
|
||||
BEFORE_DOCTYPE_NAME = 53,
|
||||
DOCTYPE_NAME = 54,
|
||||
AFTER_DOCTYPE_NAME = 55,
|
||||
AFTER_DOCTYPE_PUBLIC_KEYWORD = 56,
|
||||
BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 57,
|
||||
DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 58,
|
||||
DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 59,
|
||||
AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 60,
|
||||
BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 61,
|
||||
AFTER_DOCTYPE_SYSTEM_KEYWORD = 62,
|
||||
BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 63,
|
||||
DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 64,
|
||||
DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 65,
|
||||
AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 66,
|
||||
BOGUS_DOCTYPE = 67,
|
||||
CDATA_SECTION = 68,
|
||||
CDATA_SECTION_BRACKET = 69,
|
||||
CDATA_SECTION_END = 70,
|
||||
CHARACTER_REFERENCE = 71,
|
||||
AMBIGUOUS_AMPERSAND = 72
|
||||
}
|
||||
export declare const TokenizerMode: {
|
||||
readonly DATA: State.DATA;
|
||||
readonly RCDATA: State.RCDATA;
|
||||
readonly RAWTEXT: State.RAWTEXT;
|
||||
readonly SCRIPT_DATA: State.SCRIPT_DATA;
|
||||
readonly PLAINTEXT: State.PLAINTEXT;
|
||||
readonly CDATA_SECTION: State.CDATA_SECTION;
|
||||
};
|
||||
export interface TokenizerOptions {
|
||||
sourceCodeLocationInfo?: boolean;
|
||||
}
|
||||
export interface TokenHandler {
|
||||
onComment(token: CommentToken): void;
|
||||
onDoctype(token: DoctypeToken): void;
|
||||
onStartTag(token: TagToken): void;
|
||||
onEndTag(token: TagToken): void;
|
||||
onEof(token: EOFToken): void;
|
||||
onCharacter(token: CharacterToken): void;
|
||||
onNullCharacter(token: CharacterToken): void;
|
||||
onWhitespaceCharacter(token: CharacterToken): void;
|
||||
onParseError?: ParserErrorHandler | null;
|
||||
}
|
||||
export declare class Tokenizer {
|
||||
protected options: TokenizerOptions;
|
||||
protected handler: TokenHandler;
|
||||
preprocessor: Preprocessor;
|
||||
protected paused: boolean;
|
||||
/** Ensures that the parsing loop isn't run multiple times at once. */
|
||||
protected inLoop: boolean;
|
||||
/**
|
||||
* Indicates that the current adjusted node exists, is not an element in the HTML namespace,
|
||||
* and that it is not an integration point for either MathML or HTML.
|
||||
*
|
||||
* @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
|
||||
*/
|
||||
inForeignNode: boolean;
|
||||
lastStartTagName: string;
|
||||
active: boolean;
|
||||
state: State;
|
||||
protected returnState: State;
|
||||
/**
|
||||
* We use `entities`' `EntityDecoder` to parse character references.
|
||||
*
|
||||
* All of the following states are handled by the `EntityDecoder`:
|
||||
*
|
||||
* - Named character reference state
|
||||
* - Numeric character reference state
|
||||
* - Hexademical character reference start state
|
||||
* - Hexademical character reference state
|
||||
* - Decimal character reference state
|
||||
* - Numeric character reference end state
|
||||
*/
|
||||
protected entityDecoder: EntityDecoder;
|
||||
protected entityStartPos: number;
|
||||
protected consumedAfterSnapshot: number;
|
||||
protected currentLocation: Location | null;
|
||||
protected currentCharacterToken: CharacterToken | null;
|
||||
protected currentToken: Token | null;
|
||||
protected currentAttr: Attribute;
|
||||
constructor(options: TokenizerOptions, handler: TokenHandler);
|
||||
protected _err(code: ERR, cpOffset?: number): void;
|
||||
protected getCurrentLocation(offset: number): Location | null;
|
||||
protected _runParsingLoop(): void;
|
||||
pause(): void;
|
||||
resume(writeCallback?: () => void): void;
|
||||
write(chunk: string, isLastChunk: boolean, writeCallback?: () => void): void;
|
||||
insertHtmlAtCurrentPos(chunk: string): void;
|
||||
protected _ensureHibernation(): boolean;
|
||||
protected _consume(): number;
|
||||
protected _advanceBy(count: number): void;
|
||||
protected _consumeSequenceIfMatch(pattern: string, caseSensitive: boolean): boolean;
|
||||
protected _createStartTagToken(): void;
|
||||
protected _createEndTagToken(): void;
|
||||
protected _createCommentToken(offset: number): void;
|
||||
protected _createDoctypeToken(initialName: string | null): void;
|
||||
protected _createCharacterToken(type: CharacterToken['type'], chars: string): void;
|
||||
protected _createAttr(attrNameFirstCh: string): void;
|
||||
protected _leaveAttrName(): void;
|
||||
protected _leaveAttrValue(): void;
|
||||
protected prepareToken(ct: Token): void;
|
||||
protected emitCurrentTagToken(): void;
|
||||
protected emitCurrentComment(ct: CommentToken): void;
|
||||
protected emitCurrentDoctype(ct: DoctypeToken): void;
|
||||
protected _emitCurrentCharacterToken(nextLocation: Location | null): void;
|
||||
protected _emitEOFToken(): void;
|
||||
protected _appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string): void;
|
||||
protected _emitCodePoint(cp: number): void;
|
||||
protected _emitChars(ch: string): void;
|
||||
protected _startCharacterReference(): void;
|
||||
protected _isCharacterReferenceInAttribute(): boolean;
|
||||
protected _flushCodePointConsumedAsCharacterReference(cp: number): void;
|
||||
protected _callState(cp: number): void;
|
||||
protected _stateData(cp: number): void;
|
||||
protected _stateRcdata(cp: number): void;
|
||||
protected _stateRawtext(cp: number): void;
|
||||
protected _stateScriptData(cp: number): void;
|
||||
protected _statePlaintext(cp: number): void;
|
||||
protected _stateTagOpen(cp: number): void;
|
||||
protected _stateEndTagOpen(cp: number): void;
|
||||
protected _stateTagName(cp: number): void;
|
||||
protected _stateRcdataLessThanSign(cp: number): void;
|
||||
protected _stateRcdataEndTagOpen(cp: number): void;
|
||||
protected handleSpecialEndTag(_cp: number): boolean;
|
||||
protected _stateRcdataEndTagName(cp: number): void;
|
||||
protected _stateRawtextLessThanSign(cp: number): void;
|
||||
protected _stateRawtextEndTagOpen(cp: number): void;
|
||||
protected _stateRawtextEndTagName(cp: number): void;
|
||||
protected _stateScriptDataLessThanSign(cp: number): void;
|
||||
protected _stateScriptDataEndTagOpen(cp: number): void;
|
||||
protected _stateScriptDataEndTagName(cp: number): void;
|
||||
protected _stateScriptDataEscapeStart(cp: number): void;
|
||||
protected _stateScriptDataEscapeStartDash(cp: number): void;
|
||||
protected _stateScriptDataEscaped(cp: number): void;
|
||||
protected _stateScriptDataEscapedDash(cp: number): void;
|
||||
protected _stateScriptDataEscapedDashDash(cp: number): void;
|
||||
protected _stateScriptDataEscapedLessThanSign(cp: number): void;
|
||||
protected _stateScriptDataEscapedEndTagOpen(cp: number): void;
|
||||
protected _stateScriptDataEscapedEndTagName(cp: number): void;
|
||||
protected _stateScriptDataDoubleEscapeStart(cp: number): void;
|
||||
protected _stateScriptDataDoubleEscaped(cp: number): void;
|
||||
protected _stateScriptDataDoubleEscapedDash(cp: number): void;
|
||||
protected _stateScriptDataDoubleEscapedDashDash(cp: number): void;
|
||||
protected _stateScriptDataDoubleEscapedLessThanSign(cp: number): void;
|
||||
protected _stateScriptDataDoubleEscapeEnd(cp: number): void;
|
||||
protected _stateBeforeAttributeName(cp: number): void;
|
||||
protected _stateAttributeName(cp: number): void;
|
||||
protected _stateAfterAttributeName(cp: number): void;
|
||||
protected _stateBeforeAttributeValue(cp: number): void;
|
||||
protected _stateAttributeValueDoubleQuoted(cp: number): void;
|
||||
protected _stateAttributeValueSingleQuoted(cp: number): void;
|
||||
protected _stateAttributeValueUnquoted(cp: number): void;
|
||||
protected _stateAfterAttributeValueQuoted(cp: number): void;
|
||||
protected _stateSelfClosingStartTag(cp: number): void;
|
||||
protected _stateBogusComment(cp: number): void;
|
||||
protected _stateMarkupDeclarationOpen(cp: number): void;
|
||||
protected _stateCommentStart(cp: number): void;
|
||||
protected _stateCommentStartDash(cp: number): void;
|
||||
protected _stateComment(cp: number): void;
|
||||
protected _stateCommentLessThanSign(cp: number): void;
|
||||
protected _stateCommentLessThanSignBang(cp: number): void;
|
||||
protected _stateCommentLessThanSignBangDash(cp: number): void;
|
||||
protected _stateCommentLessThanSignBangDashDash(cp: number): void;
|
||||
protected _stateCommentEndDash(cp: number): void;
|
||||
protected _stateCommentEnd(cp: number): void;
|
||||
protected _stateCommentEndBang(cp: number): void;
|
||||
protected _stateDoctype(cp: number): void;
|
||||
protected _stateBeforeDoctypeName(cp: number): void;
|
||||
protected _stateDoctypeName(cp: number): void;
|
||||
protected _stateAfterDoctypeName(cp: number): void;
|
||||
protected _stateAfterDoctypePublicKeyword(cp: number): void;
|
||||
protected _stateBeforeDoctypePublicIdentifier(cp: number): void;
|
||||
protected _stateDoctypePublicIdentifierDoubleQuoted(cp: number): void;
|
||||
protected _stateDoctypePublicIdentifierSingleQuoted(cp: number): void;
|
||||
protected _stateAfterDoctypePublicIdentifier(cp: number): void;
|
||||
protected _stateBetweenDoctypePublicAndSystemIdentifiers(cp: number): void;
|
||||
protected _stateAfterDoctypeSystemKeyword(cp: number): void;
|
||||
protected _stateBeforeDoctypeSystemIdentifier(cp: number): void;
|
||||
protected _stateDoctypeSystemIdentifierDoubleQuoted(cp: number): void;
|
||||
protected _stateDoctypeSystemIdentifierSingleQuoted(cp: number): void;
|
||||
protected _stateAfterDoctypeSystemIdentifier(cp: number): void;
|
||||
protected _stateBogusDoctype(cp: number): void;
|
||||
protected _stateCdataSection(cp: number): void;
|
||||
protected _stateCdataSectionBracket(cp: number): void;
|
||||
protected _stateCdataSectionEnd(cp: number): void;
|
||||
protected _stateCharacterReference(): void;
|
||||
protected _stateAmbiguousAmpersand(cp: number): void;
|
||||
}
|
||||
export {};
|
2714
book/node_modules/parse5/dist/cjs/tokenizer/index.js
generated
vendored
Normal file
2714
book/node_modules/parse5/dist/cjs/tokenizer/index.js
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
36
book/node_modules/parse5/dist/cjs/tokenizer/preprocessor.d.ts
generated
vendored
Normal file
36
book/node_modules/parse5/dist/cjs/tokenizer/preprocessor.d.ts
generated
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
import { ERR, type ParserError, type ParserErrorHandler } from '../common/error-codes.js';
|
||||
export declare class Preprocessor {
|
||||
private handler;
|
||||
html: string;
|
||||
pos: number;
|
||||
private lastGapPos;
|
||||
private gapStack;
|
||||
private skipNextNewLine;
|
||||
lastChunkWritten: boolean;
|
||||
endOfChunkHit: boolean;
|
||||
bufferWaterline: number;
|
||||
private isEol;
|
||||
private lineStartPos;
|
||||
droppedBufferSize: number;
|
||||
line: number;
|
||||
constructor(handler: {
|
||||
onParseError?: ParserErrorHandler | null;
|
||||
});
|
||||
/** The column on the current line. If we just saw a gap (eg. a surrogate pair), return the index before. */
|
||||
get col(): number;
|
||||
get offset(): number;
|
||||
getError(code: ERR, cpOffset: number): ParserError;
|
||||
private lastErrOffset;
|
||||
private _err;
|
||||
private _addGap;
|
||||
private _processSurrogate;
|
||||
willDropParsedChunk(): boolean;
|
||||
dropParsedChunk(): void;
|
||||
write(chunk: string, isLastChunk: boolean): void;
|
||||
insertHtmlAtCurrentPos(chunk: string): void;
|
||||
startsWith(pattern: string, caseSensitive: boolean): boolean;
|
||||
peek(offset: number): number;
|
||||
advance(): number;
|
||||
private _checkForProblematicCharacters;
|
||||
retreat(count: number): void;
|
||||
}
|
200
book/node_modules/parse5/dist/cjs/tokenizer/preprocessor.js
generated
vendored
Normal file
200
book/node_modules/parse5/dist/cjs/tokenizer/preprocessor.js
generated
vendored
Normal file
@ -0,0 +1,200 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.Preprocessor = void 0;
|
||||
const unicode_js_1 = require("../common/unicode.js");
|
||||
const error_codes_js_1 = require("../common/error-codes.js");
|
||||
//Const
|
||||
const DEFAULT_BUFFER_WATERLINE = 1 << 16;
|
||||
//Preprocessor
|
||||
//NOTE: HTML input preprocessing
|
||||
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
|
||||
class Preprocessor {
|
||||
constructor(handler) {
|
||||
this.handler = handler;
|
||||
this.html = '';
|
||||
this.pos = -1;
|
||||
// NOTE: Initial `lastGapPos` is -2, to ensure `col` on initialisation is 0
|
||||
this.lastGapPos = -2;
|
||||
this.gapStack = [];
|
||||
this.skipNextNewLine = false;
|
||||
this.lastChunkWritten = false;
|
||||
this.endOfChunkHit = false;
|
||||
this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
|
||||
this.isEol = false;
|
||||
this.lineStartPos = 0;
|
||||
this.droppedBufferSize = 0;
|
||||
this.line = 1;
|
||||
//NOTE: avoid reporting errors twice on advance/retreat
|
||||
this.lastErrOffset = -1;
|
||||
}
|
||||
/** The column on the current line. If we just saw a gap (eg. a surrogate pair), return the index before. */
|
||||
get col() {
|
||||
return this.pos - this.lineStartPos + Number(this.lastGapPos !== this.pos);
|
||||
}
|
||||
get offset() {
|
||||
return this.droppedBufferSize + this.pos;
|
||||
}
|
||||
getError(code, cpOffset) {
|
||||
const { line, col, offset } = this;
|
||||
const startCol = col + cpOffset;
|
||||
const startOffset = offset + cpOffset;
|
||||
return {
|
||||
code,
|
||||
startLine: line,
|
||||
endLine: line,
|
||||
startCol,
|
||||
endCol: startCol,
|
||||
startOffset,
|
||||
endOffset: startOffset,
|
||||
};
|
||||
}
|
||||
_err(code) {
|
||||
if (this.handler.onParseError && this.lastErrOffset !== this.offset) {
|
||||
this.lastErrOffset = this.offset;
|
||||
this.handler.onParseError(this.getError(code, 0));
|
||||
}
|
||||
}
|
||||
_addGap() {
|
||||
this.gapStack.push(this.lastGapPos);
|
||||
this.lastGapPos = this.pos;
|
||||
}
|
||||
_processSurrogate(cp) {
|
||||
//NOTE: try to peek a surrogate pair
|
||||
if (this.pos !== this.html.length - 1) {
|
||||
const nextCp = this.html.charCodeAt(this.pos + 1);
|
||||
if ((0, unicode_js_1.isSurrogatePair)(nextCp)) {
|
||||
//NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
|
||||
this.pos++;
|
||||
//NOTE: add a gap that should be avoided during retreat
|
||||
this._addGap();
|
||||
return (0, unicode_js_1.getSurrogatePairCodePoint)(cp, nextCp);
|
||||
}
|
||||
}
|
||||
//NOTE: we are at the end of a chunk, therefore we can't infer the surrogate pair yet.
|
||||
else if (!this.lastChunkWritten) {
|
||||
this.endOfChunkHit = true;
|
||||
return unicode_js_1.CODE_POINTS.EOF;
|
||||
}
|
||||
//NOTE: isolated surrogate
|
||||
this._err(error_codes_js_1.ERR.surrogateInInputStream);
|
||||
return cp;
|
||||
}
|
||||
willDropParsedChunk() {
|
||||
return this.pos > this.bufferWaterline;
|
||||
}
|
||||
dropParsedChunk() {
|
||||
if (this.willDropParsedChunk()) {
|
||||
this.html = this.html.substring(this.pos);
|
||||
this.lineStartPos -= this.pos;
|
||||
this.droppedBufferSize += this.pos;
|
||||
this.pos = 0;
|
||||
this.lastGapPos = -2;
|
||||
this.gapStack.length = 0;
|
||||
}
|
||||
}
|
||||
write(chunk, isLastChunk) {
|
||||
if (this.html.length > 0) {
|
||||
this.html += chunk;
|
||||
}
|
||||
else {
|
||||
this.html = chunk;
|
||||
}
|
||||
this.endOfChunkHit = false;
|
||||
this.lastChunkWritten = isLastChunk;
|
||||
}
|
||||
insertHtmlAtCurrentPos(chunk) {
|
||||
this.html = this.html.substring(0, this.pos + 1) + chunk + this.html.substring(this.pos + 1);
|
||||
this.endOfChunkHit = false;
|
||||
}
|
||||
startsWith(pattern, caseSensitive) {
|
||||
// Check if our buffer has enough characters
|
||||
if (this.pos + pattern.length > this.html.length) {
|
||||
this.endOfChunkHit = !this.lastChunkWritten;
|
||||
return false;
|
||||
}
|
||||
if (caseSensitive) {
|
||||
return this.html.startsWith(pattern, this.pos);
|
||||
}
|
||||
for (let i = 0; i < pattern.length; i++) {
|
||||
const cp = this.html.charCodeAt(this.pos + i) | 0x20;
|
||||
if (cp !== pattern.charCodeAt(i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
peek(offset) {
|
||||
const pos = this.pos + offset;
|
||||
if (pos >= this.html.length) {
|
||||
this.endOfChunkHit = !this.lastChunkWritten;
|
||||
return unicode_js_1.CODE_POINTS.EOF;
|
||||
}
|
||||
const code = this.html.charCodeAt(pos);
|
||||
return code === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN ? unicode_js_1.CODE_POINTS.LINE_FEED : code;
|
||||
}
|
||||
advance() {
|
||||
this.pos++;
|
||||
//NOTE: LF should be in the last column of the line
|
||||
if (this.isEol) {
|
||||
this.isEol = false;
|
||||
this.line++;
|
||||
this.lineStartPos = this.pos;
|
||||
}
|
||||
if (this.pos >= this.html.length) {
|
||||
this.endOfChunkHit = !this.lastChunkWritten;
|
||||
return unicode_js_1.CODE_POINTS.EOF;
|
||||
}
|
||||
let cp = this.html.charCodeAt(this.pos);
|
||||
//NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
|
||||
if (cp === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN) {
|
||||
this.isEol = true;
|
||||
this.skipNextNewLine = true;
|
||||
return unicode_js_1.CODE_POINTS.LINE_FEED;
|
||||
}
|
||||
//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
|
||||
//must be ignored.
|
||||
if (cp === unicode_js_1.CODE_POINTS.LINE_FEED) {
|
||||
this.isEol = true;
|
||||
if (this.skipNextNewLine) {
|
||||
// `line` will be bumped again in the recursive call.
|
||||
this.line--;
|
||||
this.skipNextNewLine = false;
|
||||
this._addGap();
|
||||
return this.advance();
|
||||
}
|
||||
}
|
||||
this.skipNextNewLine = false;
|
||||
if ((0, unicode_js_1.isSurrogate)(cp)) {
|
||||
cp = this._processSurrogate(cp);
|
||||
}
|
||||
//OPTIMIZATION: first check if code point is in the common allowed
|
||||
//range (ASCII alphanumeric, whitespaces, big chunk of BMP)
|
||||
//before going into detailed performance cost validation.
|
||||
const isCommonValidRange = this.handler.onParseError === null ||
|
||||
(cp > 0x1f && cp < 0x7f) ||
|
||||
cp === unicode_js_1.CODE_POINTS.LINE_FEED ||
|
||||
cp === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN ||
|
||||
(cp > 0x9f && cp < 64976);
|
||||
if (!isCommonValidRange) {
|
||||
this._checkForProblematicCharacters(cp);
|
||||
}
|
||||
return cp;
|
||||
}
|
||||
_checkForProblematicCharacters(cp) {
|
||||
if ((0, unicode_js_1.isControlCodePoint)(cp)) {
|
||||
this._err(error_codes_js_1.ERR.controlCharacterInInputStream);
|
||||
}
|
||||
else if ((0, unicode_js_1.isUndefinedCodePoint)(cp)) {
|
||||
this._err(error_codes_js_1.ERR.noncharacterInInputStream);
|
||||
}
|
||||
}
|
||||
retreat(count) {
|
||||
this.pos -= count;
|
||||
while (this.pos < this.lastGapPos) {
|
||||
this.lastGapPos = this.gapStack.pop();
|
||||
this.pos--;
|
||||
}
|
||||
this.isEol = false;
|
||||
}
|
||||
}
|
||||
exports.Preprocessor = Preprocessor;
|
Reference in New Issue
Block a user