This commit is contained in:
2025-05-12 05:38:44 +09:00
parent dced21c3f8
commit 6d78bfa46e
8120 changed files with 1161564 additions and 0 deletions

18
book/node_modules/encoding-sniffer/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,18 @@
Copyright (c) 2022 Felix Boehm <me@feedic.com>
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

68
book/node_modules/encoding-sniffer/README.md generated vendored Normal file
View File

@@ -0,0 +1,68 @@
# encoding-sniffer [![Node.js CI](https://github.com/fb55/encoding-sniffer/actions/workflows/nodejs-test.yml/badge.svg)](https://github.com/fb55/encoding-sniffer/actions/workflows/nodejs-test.yml)
An implementation of the HTML encoding sniffer algo, with stream support.
This module wraps around [iconv-lite](https://github.com/ashtuchkin/iconv-lite)
to make decoding buffers and streams incredibly easy.
## Features
- Support for streams
- Support for XML encoding types, including UTF-16 prefixes and
`<?xml encoding="...">`
- Allows decoding streams and buffers with a single function call
## Installation
```bash
npm install encoding-sniffer
```
## Usage
```js
import { DecodeStream, getEncoding, decodeBuffer } from "encoding-sniffer";
/**
* All functions accept an optional options object.
*
* Available options are (with default values):
*/
const options = {
/**
* The maximum number of bytes to sniff. Defaults to `1024`.
*/
maxBytes: 1024,
/**
* The encoding specified by the user. If set, this will only be overridden
* by a Byte Order Mark (BOM).
*/
userEncoding: undefined,
/**
* The encoding specified by the transport layer. If set, this will only be
* overridden by a Byte Order Mark (BOM) or the user encoding.
*/
transportLayerEncodingLabel: undefined,
/**
* The default encoding to use, if no encoding can be detected.
*
* Defaults to `"windows-1252"`.
*/
defaultEncoding: "windows-1252",
};
// Use the `DecodeStream` transform stream to automatically decode
// the contents of a stream as they are read
const decodeStream = new DecodeStream(options);
// Or, use the `getEncoding` function to detect the encoding of a buffer
const encoding = getEncoding(buffer, options);
// Use the `decodeBuffer` function to decode the contents of a buffer
const decodedBuffer = decodeBuffer(buffer, options);
```
## License
This project is licensed under the MIT License. See the [LICENSE](/LICENSE) file
for more information.

View File

@@ -0,0 +1,33 @@
/// <reference types="node" />
/// <reference types="node" />
import { Transform, type TransformCallback } from "node:stream";
import type { SnifferOptions } from "./sniffer.js";
/**
* Sniff the encoding of a buffer, then decode it.
*
* @param buffer Buffer to be decoded
* @param options Options for the sniffer
* @returns The decoded buffer
*/
export declare function decodeBuffer(buffer: Buffer, options?: SnifferOptions): string;
/**
* Decodes a stream of buffers into a stream of strings.
*
* Reads the first 1024 bytes and passes them to the sniffer. Once an encoding
* has been determined, it passes all data to iconv-lite's stream and outputs
* the results.
*/
export declare class DecodeStream extends Transform {
private readonly sniffer;
private readonly buffers;
/** The iconv decode stream. If it is set, we have read more than `options.maxBytes` bytes. */
private iconv;
private readonly maxBytes;
private readBytes;
constructor(options?: SnifferOptions);
_transform(chunk: Uint8Array, _encoding: string, callback: TransformCallback): void;
private getIconvStream;
_flush(callback: TransformCallback): void;
}
export { type SnifferOptions, getEncoding } from "./sniffer.js";
//# sourceMappingURL=index.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;AAAA,OAAO,EAAE,SAAS,EAAE,KAAK,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAEhE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAGnD;;;;;;GAMG;AACH,wBAAgB,YAAY,CACxB,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,cAAmB,GAC7B,MAAM,CAER;AAED;;;;;;GAMG;AACH,qBAAa,YAAa,SAAQ,SAAS;IACvC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoB;IAC5C,8FAA8F;IAC9F,OAAO,CAAC,KAAK,CAAuC;IACpD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC;IAC1B,OAAO,CAAC,SAAS,CAAK;gBAEV,OAAO,CAAC,EAAE,cAAc;IAM3B,UAAU,CACf,KAAK,EAAE,UAAU,EACjB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,iBAAiB,GAC5B,IAAI;IAeP,OAAO,CAAC,cAAc;IAmBb,MAAM,CAAC,QAAQ,EAAE,iBAAiB,GAAG,IAAI;CAGrD;AAED,OAAO,EAAE,KAAK,cAAc,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC"}

View File

@@ -0,0 +1,93 @@
"use strict";
var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
if (typeof b !== "function" && b !== null)
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.getEncoding = exports.DecodeStream = exports.decodeBuffer = void 0;
var node_stream_1 = require("node:stream");
var iconv_lite_1 = __importDefault(require("iconv-lite"));
var sniffer_js_1 = require("./sniffer.js");
/**
* Sniff the encoding of a buffer, then decode it.
*
* @param buffer Buffer to be decoded
* @param options Options for the sniffer
* @returns The decoded buffer
*/
function decodeBuffer(buffer, options) {
if (options === void 0) { options = {}; }
return iconv_lite_1.default.decode(buffer, (0, sniffer_js_1.getEncoding)(buffer, options));
}
exports.decodeBuffer = decodeBuffer;
/**
* Decodes a stream of buffers into a stream of strings.
*
* Reads the first 1024 bytes and passes them to the sniffer. Once an encoding
* has been determined, it passes all data to iconv-lite's stream and outputs
* the results.
*/
var DecodeStream = /** @class */ (function (_super) {
__extends(DecodeStream, _super);
function DecodeStream(options) {
var _a;
var _this = _super.call(this, { decodeStrings: false, encoding: "utf-8" }) || this;
_this.buffers = [];
/** The iconv decode stream. If it is set, we have read more than `options.maxBytes` bytes. */
_this.iconv = null;
_this.readBytes = 0;
_this.sniffer = new sniffer_js_1.Sniffer(options);
_this.maxBytes = (_a = options === null || options === void 0 ? void 0 : options.maxBytes) !== null && _a !== void 0 ? _a : 1024;
return _this;
}
DecodeStream.prototype._transform = function (chunk, _encoding, callback) {
if (this.readBytes < this.maxBytes) {
this.sniffer.write(chunk);
this.readBytes += chunk.length;
if (this.readBytes < this.maxBytes) {
this.buffers.push(chunk);
callback();
return;
}
}
this.getIconvStream().write(chunk, callback);
};
DecodeStream.prototype.getIconvStream = function () {
var _this = this;
if (this.iconv) {
return this.iconv;
}
var stream = iconv_lite_1.default.decodeStream(this.sniffer.encoding);
stream.on("data", function (chunk) { return _this.push(chunk, "utf-8"); });
stream.on("end", function () { return _this.push(null); });
this.iconv = stream;
for (var _i = 0, _a = this.buffers; _i < _a.length; _i++) {
var buffer = _a[_i];
stream.write(buffer);
}
this.buffers.length = 0;
return stream;
};
DecodeStream.prototype._flush = function (callback) {
this.getIconvStream().end(callback);
};
return DecodeStream;
}(node_stream_1.Transform));
exports.DecodeStream = DecodeStream;
var sniffer_js_2 = require("./sniffer.js");
Object.defineProperty(exports, "getEncoding", { enumerable: true, get: function () { return sniffer_js_2.getEncoding; } });
//# sourceMappingURL=index.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;AAAA,2CAAgE;AAChE,0DAA+B;AAE/B,2CAAoD;AAEpD;;;;;;GAMG;AACH,SAAgB,YAAY,CACxB,MAAc,EACd,OAA4B;IAA5B,wBAAA,EAAA,YAA4B;IAE5B,OAAO,oBAAK,CAAC,MAAM,CAAC,MAAM,EAAE,IAAA,wBAAW,EAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;AAC9D,CAAC;AALD,oCAKC;AAED;;;;;;GAMG;AACH;IAAkC,gCAAS;IAQvC,sBAAY,OAAwB;;QAChC,YAAA,MAAK,YAAC,EAAE,aAAa,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,SAAC;QAPtC,aAAO,GAAiB,EAAE,CAAC;QAC5C,8FAA8F;QACtF,WAAK,GAAkC,IAAI,CAAC;QAE5C,eAAS,GAAG,CAAC,CAAC;QAIlB,KAAI,CAAC,OAAO,GAAG,IAAI,oBAAO,CAAC,OAAO,CAAC,CAAC;QACpC,KAAI,CAAC,QAAQ,GAAG,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,QAAQ,mCAAI,IAAI,CAAC;;IAC9C,CAAC;IAEQ,iCAAU,GAAnB,UACI,KAAiB,EACjB,SAAiB,EACjB,QAA2B;QAE3B,IAAI,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;YACjC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAC1B,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC,MAAM,CAAC;YAE/B,IAAI,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACjC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACzB,QAAQ,EAAE,CAAC;gBACX,OAAO;YACX,CAAC;QACL,CAAC;QAED,IAAI,CAAC,cAAc,EAAE,CAAC,KAAK,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IACjD,CAAC;IAEO,qCAAc,GAAtB;QAAA,iBAiBC;QAhBG,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,IAAI,CAAC,KAAK,CAAC;QACtB,CAAC;QAED,IAAM,MAAM,GAAG,oBAAK,CAAC,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzD,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,UAAC,KAAa,IAAK,OAAA,KAAI,CAAC,IAAI,CAAC,KAAK,EAAE,OAAO,CAAC,EAAzB,CAAyB,CAAC,CAAC;QAChE,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,cAAM,OAAA,KAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAf,CAAe,CAAC,CAAC;QAExC,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC;QAEpB,KAAqB,UAAY,EAAZ,KAAA,IAAI,CAAC,OAAO,EAAZ,cAAY,EAAZ,IAAY,EAAE,CAAC;YAA/B,IAAM,MAAM,SAAA;YACb,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACzB,CAAC;QACD,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;QAExB,OAAO,MAAM,CAAC;IAClB,CAAC;IAEQ,6BAAM,GAAf,UAAgB,QAA2B;QACvC,IAAI,CAAC,cAAc,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC;IACL,mBAAC;AAAD,CAAC,AAvDD,CAAkC,uBAAS,GAuD1C;AAvDY,oCAAY;AAyDzB,2CAAgE;AAAlC,yGAAA,WAAW,OAAA"}

View File

@@ -0,0 +1,3 @@
{
"type": "commonjs"
}

View File

@@ -0,0 +1,148 @@
export declare enum ResultType {
BOM = 0,
PASSED = 1,
XML_PREFIX = 2,
META_TAG = 3,
XML_ENCODING = 4,
DEFAULT = 5
}
export declare const STRINGS: {
UTF8_BOM: Uint8Array;
UTF16LE_BOM: Uint8Array;
UTF16BE_BOM: Uint8Array;
UTF16LE_XML_PREFIX: Uint8Array;
UTF16BE_XML_PREFIX: Uint8Array;
XML_DECLARATION: Uint8Array;
ENCODING: Uint8Array;
META: Uint8Array;
HTTP_EQUIV: Uint8Array;
CONTENT: Uint8Array;
CONTENT_TYPE: Uint8Array;
CHARSET: Uint8Array;
COMMENT_START: Uint8Array;
COMMENT_END: Uint8Array;
};
export interface SnifferOptions {
/**
* The maximum number of bytes to sniff.
*
* @default 1024
*/
maxBytes?: number;
/**
* The encoding specified by the user.
*/
userEncoding?: string;
/**
* The encoding specified by the transport layer.
*/
transportLayerEncodingLabel?: string;
/**
* The default encoding to use.
*
* @default "windows-1252"
*/
defaultEncoding?: string;
}
export declare class Sniffer {
/** The maximum number of bytes to sniff. */
private readonly maxBytes;
/** The offset of the previous buffers. */
private offset;
private state;
private sectionIndex;
private attribType;
/**
* Indicates if the `http-equiv` is `content-type`.
*
* Initially `null`, a boolean when a value is found.
*/
private gotPragma;
private needsPragma;
private inMetaTag;
encoding: string;
resultType: ResultType;
private setResult;
constructor({ maxBytes, userEncoding, transportLayerEncodingLabel, defaultEncoding, }?: SnifferOptions);
private stateBegin;
private stateBeginLT;
private stateUTF16BE_XML_PREFIX;
private stateUTF16LE_XML_PREFIX;
private stateBOM16LE;
private stateBOM16BE;
private stateBOM8;
private stateBeforeTag;
/**
* We have seen a `<`, and now have to figure out what to do.
*
* Options:
* - `<meta`
* - Any other tag
* - A closing tag
* - `<!--`
* - An XML declaration
*
*/
private stateBeforeTagName;
private stateBeforeCloseTagName;
private stateCommentStart;
private stateCommentEnd;
/**
* Any section starting with `<!`, `<?`, `</`, without being a closing tag or comment.
*/
private stateWeirdTag;
/**
* Advances the section, ignoring upper/lower case.
*
* Make sure the section has left-over characters before calling.
*
* @returns `false` if we did not match the section.
*/
private advanceSectionIC;
/**
* Advances the section.
*
* Make sure the section has left-over characters before calling.
*
* @returns `false` if we did not match the section.
*/
private advanceSection;
private stateTagNameMeta;
private stateTagNameOther;
private stateBeforeAttribute;
private handleMetaAttrib;
private stateMetaAttribHttpEquiv;
private stateMetaAttribC;
private stateMetaAttribCharset;
private stateMetaAttribContent;
private stateMetaAttribAfterName;
private stateAnyAttribName;
private stateAfterAttributeName;
private quoteCharacter;
private readonly attributeValue;
private stateBeforeAttributeValue;
private stateMetaAttribHttpEquivValue;
private handleMetaContentValue;
private handleAttributeValue;
private stateAttributeValueUnquoted;
private findMetaContentEncoding;
private stateMetaContentValueUnquotedBeforeEncoding;
private stateMetaContentValueUnquotedBeforeValue;
private stateMetaContentValueUnquotedValueQuoted;
private stateMetaContentValueUnquotedValueUnquoted;
private stateMetaContentValueQuotedValueUnquoted;
private stateMetaContentValueQuotedValueQuoted;
private stateMetaContentValueQuotedBeforeEncoding;
private stateMetaContentValueQuotedAfterEncoding;
private stateMetaContentValueQuotedBeforeValue;
private stateAttributeValueQuoted;
private stateXMLDeclaration;
private stateXMLDeclarationBeforeEncoding;
private stateXMLDeclarationAfterEncoding;
private stateXMLDeclarationBeforeValue;
private stateXMLDeclarationValue;
write(buffer: Uint8Array): void;
}
/** Get the encoding for the passed buffer. */
export declare function getEncoding(buffer: Uint8Array, options?: SnifferOptions): string;
//# sourceMappingURL=sniffer.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"sniffer.d.ts","sourceRoot":"","sources":["../../src/sniffer.ts"],"names":[],"mappings":"AAuEA,oBAAY,UAAU;IAElB,GAAG,IAAI;IAEP,MAAM,IAAI;IAEV,UAAU,IAAI;IAEd,QAAQ,IAAI;IAEZ,YAAY,IAAI;IAEhB,OAAO,IAAI;CACd;AAgDD,eAAO,MAAM,OAAO;;;;;;;;;;;;;;;CAenB,CAAC;AAaF,MAAM,WAAW,cAAc;IAC3B;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;OAEG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IACrC;;;;OAIG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,qBAAa,OAAO;IAChB,4CAA4C;IAC5C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,0CAA0C;IAC1C,OAAO,CAAC,MAAM,CAAK;IAEnB,OAAO,CAAC,KAAK,CAAe;IAC5B,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,UAAU,CAAmB;IACrC;;;;OAIG;IACH,OAAO,CAAC,SAAS,CAAwB;IACzC,OAAO,CAAC,WAAW,CAAuB;IAE1C,OAAO,CAAC,SAAS,CAAS;IAEnB,QAAQ,SAAkB;IAC1B,UAAU,aAAsB;IAEvC,OAAO,CAAC,SAAS;gBAsBL,EACR,QAAe,EACf,YAAY,EACZ,2BAA2B,EAC3B,eAAe,GAClB,GAAE,cAAmB;IAetB,OAAO,CAAC,UAAU;IAmClB,OAAO,CAAC,YAAY;IAapB,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,YAAY;IASpB,OAAO,CAAC,YAAY;IASpB,OAAO,CAAC,SAAS;IASjB,OAAO,CAAC,cAAc;IAOtB;;;;;;;;;;OAUG;IACH,OAAO,CAAC,kBAAkB;IAiC1B,OAAO,CAAC,uBAAuB;IAO/B,OAAO,CAAC,iBAAiB;IAazB,OAAO,CAAC,eAAe;IAcvB;;OAEG;IACH,OAAO,CAAC,aAAa;IAMrB;;;;;;OAMG;IACH,OAAO,CAAC,gBAAgB;IAIxB;;;;;;OAMG;IACH,OAAO,CAAC,cAAc;IAUtB,OAAO,CAAC,gBAAgB;IAkBxB,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,oBAAoB;IAsB5B,OAAO,CAAC,gBAAgB;IAgBxB,OAAO,CAAC,wBAAwB;IAIhC,OAAO,CAAC,gBAAgB;IAcxB,OAAO,CAAC,sBAAsB;IAI9B,OAAO,CAAC,sBAAsB;IAI9B,OAAO,CAAC,wBAAwB;IAUhC,OAAO,CAAC,kBAAkB;IAW1B,OAAO,CAAC,uBAAuB;IAW/B,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAgB;IAE/C,OAAO,CAAC,yBAAyB;IA8BjC,OAAO,CAAC,6BAA6B;IA+BrC,OAAO,CAAC,sBAAsB;IAe9B,OAAO,CAAC,oBAAoB;IAS5B,OAAO,CAAC,2BAA2B;IAYnC,OAAO,CAAC,uBAAuB;IAY/B,OAAO,CAAC,2CAA2C;IAYnD,OAAO,CAAC,wCAAwC;IAahD,OAAO,CAAC,wCAAwC;IAYhD,OAAO,CAAC,0CAA0C;IAUlD,OAAO,CAAC,wCAAwC;IAWhD,OAAO,CAAC,sCAAsC;IAgB9C,OAAO,CAAC,yCAAyC;IAQjD,OAAO,CAAC,wCAAwC;IAUhD,OAAO,CAAC,sCAAsC;IAW9C,OAAO,CAAC,yBAAyB;IAUjC,OAAO,CAAC,mBAAmB;IAW3B,OAAO,CAAC,iCAAiC;IAazC,OAAO,CAAC,gCAAgC;IASxC,OAAO,CAAC,8BAA8B;IAUtC,OAAO,CAAC,wBAAwB;IAgBzB,KAAK,CAAC,MAAM,EAAE,UAAU,GAAG,IAAI;CAoOzC;AAED,8CAA8C;AAC9C,wBAAgB,WAAW,CACvB,MAAM,EAAE,UAAU,EAClB,OAAO,CAAC,EAAE,cAAc,GACzB,MAAM,CAIR"}

View File

@@ -0,0 +1,992 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.getEncoding = exports.Sniffer = exports.STRINGS = exports.ResultType = void 0;
var whatwg_encoding_1 = require("whatwg-encoding");
// https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding
var State;
(function (State) {
// Before anything starts; can be any of BOM, UTF-16 XML declarations or meta tags
State[State["Begin"] = 0] = "Begin";
// Inside of a BOM
State[State["BOM16BE"] = 1] = "BOM16BE";
State[State["BOM16LE"] = 2] = "BOM16LE";
State[State["BOM8"] = 3] = "BOM8";
// XML prefix
State[State["UTF16LE_XML_PREFIX"] = 4] = "UTF16LE_XML_PREFIX";
State[State["BeginLT"] = 5] = "BeginLT";
State[State["UTF16BE_XML_PREFIX"] = 6] = "UTF16BE_XML_PREFIX";
// Waiting for opening `<`
State[State["BeforeTag"] = 7] = "BeforeTag";
// After the opening `<`
State[State["BeforeTagName"] = 8] = "BeforeTagName";
// After `</`
State[State["BeforeCloseTagName"] = 9] = "BeforeCloseTagName";
// Beginning of a comment
State[State["CommentStart"] = 10] = "CommentStart";
// End of a comment
State[State["CommentEnd"] = 11] = "CommentEnd";
// A tag name that could be `meta`
State[State["TagNameMeta"] = 12] = "TagNameMeta";
// A tag name that is not `meta`
State[State["TagNameOther"] = 13] = "TagNameOther";
// XML declaration
State[State["XMLDeclaration"] = 14] = "XMLDeclaration";
State[State["XMLDeclarationBeforeEncoding"] = 15] = "XMLDeclarationBeforeEncoding";
State[State["XMLDeclarationAfterEncoding"] = 16] = "XMLDeclarationAfterEncoding";
State[State["XMLDeclarationBeforeValue"] = 17] = "XMLDeclarationBeforeValue";
State[State["XMLDeclarationValue"] = 18] = "XMLDeclarationValue";
// Anything that looks like a tag, but doesn't fit in the above categories
State[State["WeirdTag"] = 19] = "WeirdTag";
State[State["BeforeAttribute"] = 20] = "BeforeAttribute";
/*
* Attributes in meta tag — we compare them to our set here, and back out
* We care about four attributes: http-equiv, content-type, content, charset
*/
State[State["MetaAttribHttpEquiv"] = 21] = "MetaAttribHttpEquiv";
// The value has to be `content-type`
State[State["MetaAttribHttpEquivValue"] = 22] = "MetaAttribHttpEquivValue";
State[State["MetaAttribC"] = 23] = "MetaAttribC";
State[State["MetaAttribContent"] = 24] = "MetaAttribContent";
State[State["MetaAttribCharset"] = 25] = "MetaAttribCharset";
// Waiting for whitespace
State[State["MetaAttribAfterName"] = 26] = "MetaAttribAfterName";
State[State["MetaContentValueQuotedBeforeEncoding"] = 27] = "MetaContentValueQuotedBeforeEncoding";
State[State["MetaContentValueQuotedAfterEncoding"] = 28] = "MetaContentValueQuotedAfterEncoding";
State[State["MetaContentValueQuotedBeforeValue"] = 29] = "MetaContentValueQuotedBeforeValue";
State[State["MetaContentValueQuotedValueQuoted"] = 30] = "MetaContentValueQuotedValueQuoted";
State[State["MetaContentValueQuotedValueUnquoted"] = 31] = "MetaContentValueQuotedValueUnquoted";
State[State["MetaContentValueUnquotedBeforeEncoding"] = 32] = "MetaContentValueUnquotedBeforeEncoding";
State[State["MetaContentValueUnquotedBeforeValue"] = 33] = "MetaContentValueUnquotedBeforeValue";
State[State["MetaContentValueUnquotedValueQuoted"] = 34] = "MetaContentValueUnquotedValueQuoted";
State[State["MetaContentValueUnquotedValueUnquoted"] = 35] = "MetaContentValueUnquotedValueUnquoted";
State[State["AnyAttribName"] = 36] = "AnyAttribName";
// After the name of an attribute, before the equals sign
State[State["AfterAttributeName"] = 37] = "AfterAttributeName";
// After `=`
State[State["BeforeAttributeValue"] = 38] = "BeforeAttributeValue";
State[State["AttributeValueQuoted"] = 39] = "AttributeValueQuoted";
State[State["AttributeValueUnquoted"] = 40] = "AttributeValueUnquoted";
})(State || (State = {}));
var ResultType;
(function (ResultType) {
// Byte order mark
ResultType[ResultType["BOM"] = 0] = "BOM";
// User- or transport layer-defined
ResultType[ResultType["PASSED"] = 1] = "PASSED";
// XML prefixes
ResultType[ResultType["XML_PREFIX"] = 2] = "XML_PREFIX";
// Meta tag
ResultType[ResultType["META_TAG"] = 3] = "META_TAG";
// XML encoding
ResultType[ResultType["XML_ENCODING"] = 4] = "XML_ENCODING";
// Default
ResultType[ResultType["DEFAULT"] = 5] = "DEFAULT";
})(ResultType || (exports.ResultType = ResultType = {}));
var AttribType;
(function (AttribType) {
AttribType[AttribType["None"] = 0] = "None";
AttribType[AttribType["HttpEquiv"] = 1] = "HttpEquiv";
AttribType[AttribType["Content"] = 2] = "Content";
AttribType[AttribType["Charset"] = 3] = "Charset";
})(AttribType || (AttribType = {}));
var Chars;
(function (Chars) {
Chars[Chars["NIL"] = 0] = "NIL";
Chars[Chars["TAB"] = 9] = "TAB";
Chars[Chars["LF"] = 10] = "LF";
Chars[Chars["CR"] = 13] = "CR";
Chars[Chars["SPACE"] = 32] = "SPACE";
Chars[Chars["EXCLAMATION"] = 33] = "EXCLAMATION";
Chars[Chars["DQUOTE"] = 34] = "DQUOTE";
Chars[Chars["SQUOTE"] = 39] = "SQUOTE";
Chars[Chars["DASH"] = 45] = "DASH";
Chars[Chars["SLASH"] = 47] = "SLASH";
Chars[Chars["SEMICOLON"] = 59] = "SEMICOLON";
Chars[Chars["LT"] = 60] = "LT";
Chars[Chars["EQUALS"] = 61] = "EQUALS";
Chars[Chars["GT"] = 62] = "GT";
Chars[Chars["QUESTION"] = 63] = "QUESTION";
Chars[Chars["UpperA"] = 65] = "UpperA";
Chars[Chars["UpperZ"] = 90] = "UpperZ";
Chars[Chars["LowerA"] = 97] = "LowerA";
Chars[Chars["LowerZ"] = 122] = "LowerZ";
})(Chars || (Chars = {}));
var SPACE_CHARACTERS = new Set([Chars.SPACE, Chars.LF, Chars.CR, Chars.TAB]);
var END_OF_UNQUOTED_ATTRIBUTE_VALUE = new Set([
Chars.SPACE,
Chars.LF,
Chars.CR,
Chars.TAB,
Chars.GT,
]);
function toUint8Array(str) {
var arr = new Uint8Array(str.length);
for (var i = 0; i < str.length; i++) {
arr[i] = str.charCodeAt(i);
}
return arr;
}
exports.STRINGS = {
UTF8_BOM: new Uint8Array([0xef, 0xbb, 0xbf]),
UTF16LE_BOM: new Uint8Array([0xff, 0xfe]),
UTF16BE_BOM: new Uint8Array([0xfe, 0xff]),
UTF16LE_XML_PREFIX: new Uint8Array([0x3c, 0x0, 0x3f, 0x0, 0x78, 0x0]),
UTF16BE_XML_PREFIX: new Uint8Array([0x0, 0x3c, 0x0, 0x3f, 0x0, 0x78]),
XML_DECLARATION: toUint8Array("<?xml"),
ENCODING: toUint8Array("encoding"),
META: toUint8Array("meta"),
HTTP_EQUIV: toUint8Array("http-equiv"),
CONTENT: toUint8Array("content"),
CONTENT_TYPE: toUint8Array("content-type"),
CHARSET: toUint8Array("charset"),
COMMENT_START: toUint8Array("<!--"),
COMMENT_END: toUint8Array("-->"),
};
function isAsciiAlpha(c) {
return ((c >= Chars.UpperA && c <= Chars.UpperZ) ||
(c >= Chars.LowerA && c <= Chars.LowerZ));
}
function isQuote(c) {
return c === Chars.DQUOTE || c === Chars.SQUOTE;
}
var Sniffer = /** @class */ (function () {
function Sniffer(_a) {
var _b = _a === void 0 ? {} : _a, _c = _b.maxBytes, maxBytes = _c === void 0 ? 1024 : _c, userEncoding = _b.userEncoding, transportLayerEncodingLabel = _b.transportLayerEncodingLabel, defaultEncoding = _b.defaultEncoding;
/** The offset of the previous buffers. */
this.offset = 0;
this.state = State.Begin;
this.sectionIndex = 0;
this.attribType = AttribType.None;
/**
* Indicates if the `http-equiv` is `content-type`.
*
* Initially `null`, a boolean when a value is found.
*/
this.gotPragma = null;
this.needsPragma = null;
this.inMetaTag = false;
this.encoding = "windows-1252";
this.resultType = ResultType.DEFAULT;
this.quoteCharacter = 0;
this.attributeValue = [];
this.maxBytes = maxBytes;
if (userEncoding) {
this.setResult(userEncoding, ResultType.PASSED);
}
if (transportLayerEncodingLabel) {
this.setResult(transportLayerEncodingLabel, ResultType.PASSED);
}
if (defaultEncoding) {
this.setResult(defaultEncoding, ResultType.DEFAULT);
}
}
Sniffer.prototype.setResult = function (label, type) {
if (this.resultType === ResultType.DEFAULT || this.resultType > type) {
var encoding = (0, whatwg_encoding_1.labelToName)(label);
if (encoding) {
this.encoding =
// Check if we are in a meta tag and the encoding is `x-user-defined`
type === ResultType.META_TAG &&
encoding === "x-user-defined"
? "windows-1252"
: // Check if we are in a meta tag or xml declaration, and the encoding is UTF-16
(type === ResultType.META_TAG ||
type === ResultType.XML_ENCODING) &&
(encoding === "UTF-16LE" || encoding === "UTF-16BE")
? "UTF-8"
: encoding;
this.resultType = type;
}
}
};
Sniffer.prototype.stateBegin = function (c) {
switch (c) {
case exports.STRINGS.UTF16BE_BOM[0]: {
this.state = State.BOM16BE;
break;
}
case exports.STRINGS.UTF16LE_BOM[0]: {
this.state = State.BOM16LE;
break;
}
case exports.STRINGS.UTF8_BOM[0]: {
this.sectionIndex = 1;
this.state = State.BOM8;
break;
}
case Chars.NIL: {
this.state = State.UTF16BE_XML_PREFIX;
this.sectionIndex = 1;
break;
}
case Chars.LT: {
this.state = State.BeginLT;
break;
}
default: {
this.state = State.BeforeTag;
}
}
};
Sniffer.prototype.stateBeginLT = function (c) {
if (c === Chars.NIL) {
this.state = State.UTF16LE_XML_PREFIX;
this.sectionIndex = 2;
}
else if (c === Chars.QUESTION) {
this.state = State.XMLDeclaration;
this.sectionIndex = 2;
}
else {
this.state = State.BeforeTagName;
this.stateBeforeTagName(c);
}
};
Sniffer.prototype.stateUTF16BE_XML_PREFIX = function (c) {
// Advance position in the section
if (this.advanceSection(exports.STRINGS.UTF16BE_XML_PREFIX, c)) {
if (this.sectionIndex === exports.STRINGS.UTF16BE_XML_PREFIX.length) {
// We have the whole prefix
this.setResult("utf-16be", ResultType.XML_PREFIX);
}
}
else {
this.state = State.BeforeTag;
this.stateBeforeTag(c);
}
};
Sniffer.prototype.stateUTF16LE_XML_PREFIX = function (c) {
// Advance position in the section
if (this.advanceSection(exports.STRINGS.UTF16LE_XML_PREFIX, c)) {
if (this.sectionIndex === exports.STRINGS.UTF16LE_XML_PREFIX.length) {
// We have the whole prefix
this.setResult("utf-16le", ResultType.XML_PREFIX);
}
}
else {
this.state = State.BeforeTag;
this.stateBeforeTag(c);
}
};
Sniffer.prototype.stateBOM16LE = function (c) {
if (c === exports.STRINGS.UTF16LE_BOM[1]) {
this.setResult("utf-16le", ResultType.BOM);
}
else {
this.state = State.BeforeTag;
this.stateBeforeTag(c);
}
};
Sniffer.prototype.stateBOM16BE = function (c) {
if (c === exports.STRINGS.UTF16BE_BOM[1]) {
this.setResult("utf-16be", ResultType.BOM);
}
else {
this.state = State.BeforeTag;
this.stateBeforeTag(c);
}
};
Sniffer.prototype.stateBOM8 = function (c) {
if (this.advanceSection(exports.STRINGS.UTF8_BOM, c) &&
this.sectionIndex === exports.STRINGS.UTF8_BOM.length) {
this.setResult("utf-8", ResultType.BOM);
}
};
Sniffer.prototype.stateBeforeTag = function (c) {
if (c === Chars.LT) {
this.state = State.BeforeTagName;
this.inMetaTag = false;
}
};
/**
* We have seen a `<`, and now have to figure out what to do.
*
* Options:
* - `<meta`
* - Any other tag
* - A closing tag
* - `<!--`
* - An XML declaration
*
*/
Sniffer.prototype.stateBeforeTagName = function (c) {
if (isAsciiAlpha(c)) {
if ((c | 0x20) === exports.STRINGS.META[0]) {
this.sectionIndex = 1;
this.state = State.TagNameMeta;
}
else {
this.state = State.TagNameOther;
}
}
else
switch (c) {
case Chars.SLASH: {
this.state = State.BeforeCloseTagName;
break;
}
case Chars.EXCLAMATION: {
this.state = State.CommentStart;
this.sectionIndex = 2;
break;
}
case Chars.QUESTION: {
this.state = State.WeirdTag;
break;
}
default: {
this.state = State.BeforeTag;
this.stateBeforeTag(c);
}
}
};
Sniffer.prototype.stateBeforeCloseTagName = function (c) {
this.state = isAsciiAlpha(c)
? // Switch to `TagNameOther`; the HTML spec allows attributes here as well.
State.TagNameOther
: State.WeirdTag;
};
Sniffer.prototype.stateCommentStart = function (c) {
if (this.advanceSection(exports.STRINGS.COMMENT_START, c)) {
if (this.sectionIndex === exports.STRINGS.COMMENT_START.length) {
this.state = State.CommentEnd;
// The -- of the comment start can be part of the end.
this.sectionIndex = 2;
}
}
else {
this.state = State.WeirdTag;
this.stateWeirdTag(c);
}
};
Sniffer.prototype.stateCommentEnd = function (c) {
if (this.advanceSection(exports.STRINGS.COMMENT_END, c)) {
if (this.sectionIndex === exports.STRINGS.COMMENT_END.length) {
this.state = State.BeforeTag;
}
}
else if (c === Chars.DASH) {
/*
* If we are here, we know we expected a `>` above.
* Set this to 2, to support many dashes before the closing `>`.
*/
this.sectionIndex = 2;
}
};
/**
* Any section starting with `<!`, `<?`, `</`, without being a closing tag or comment.
*/
Sniffer.prototype.stateWeirdTag = function (c) {
if (c === Chars.GT) {
this.state = State.BeforeTag;
}
};
/**
* Advances the section, ignoring upper/lower case.
*
* Make sure the section has left-over characters before calling.
*
* @returns `false` if we did not match the section.
*/
Sniffer.prototype.advanceSectionIC = function (section, c) {
return this.advanceSection(section, c | 0x20);
};
/**
* Advances the section.
*
* Make sure the section has left-over characters before calling.
*
* @returns `false` if we did not match the section.
*/
Sniffer.prototype.advanceSection = function (section, c) {
if (section[this.sectionIndex] === c) {
this.sectionIndex++;
return true;
}
this.sectionIndex = 0;
return false;
};
Sniffer.prototype.stateTagNameMeta = function (c) {
if (this.sectionIndex < exports.STRINGS.META.length) {
if (this.advanceSectionIC(exports.STRINGS.META, c)) {
return;
}
}
else if (SPACE_CHARACTERS.has(c)) {
this.inMetaTag = true;
this.gotPragma = null;
this.needsPragma = null;
this.state = State.BeforeAttribute;
return;
}
this.state = State.TagNameOther;
// Reconsume in case there is a `>`.
this.stateTagNameOther(c);
};
Sniffer.prototype.stateTagNameOther = function (c) {
if (SPACE_CHARACTERS.has(c)) {
this.state = State.BeforeAttribute;
}
else if (c === Chars.GT) {
this.state = State.BeforeTag;
}
};
Sniffer.prototype.stateBeforeAttribute = function (c) {
if (SPACE_CHARACTERS.has(c))
return;
if (this.inMetaTag) {
var lower = c | 0x20;
if (lower === exports.STRINGS.HTTP_EQUIV[0]) {
this.sectionIndex = 1;
this.state = State.MetaAttribHttpEquiv;
return;
}
else if (lower === exports.STRINGS.CHARSET[0]) {
this.sectionIndex = 1;
this.state = State.MetaAttribC;
return;
}
}
this.state =
c === Chars.SLASH || c === Chars.GT
? State.BeforeTag
: State.AnyAttribName;
};
Sniffer.prototype.handleMetaAttrib = function (c, section, type) {
if (this.advanceSectionIC(section, c)) {
if (this.sectionIndex === section.length) {
this.attribType = type;
this.state = State.MetaAttribAfterName;
}
}
else {
this.state = State.AnyAttribName;
this.stateAnyAttribName(c);
}
};
Sniffer.prototype.stateMetaAttribHttpEquiv = function (c) {
this.handleMetaAttrib(c, exports.STRINGS.HTTP_EQUIV, AttribType.HttpEquiv);
};
Sniffer.prototype.stateMetaAttribC = function (c) {
var lower = c | 0x20;
if (lower === exports.STRINGS.CHARSET[1]) {
this.sectionIndex = 2;
this.state = State.MetaAttribCharset;
}
else if (lower === exports.STRINGS.CONTENT[1]) {
this.sectionIndex = 2;
this.state = State.MetaAttribContent;
}
else {
this.state = State.AnyAttribName;
this.stateAnyAttribName(c);
}
};
Sniffer.prototype.stateMetaAttribCharset = function (c) {
this.handleMetaAttrib(c, exports.STRINGS.CHARSET, AttribType.Charset);
};
Sniffer.prototype.stateMetaAttribContent = function (c) {
this.handleMetaAttrib(c, exports.STRINGS.CONTENT, AttribType.Content);
};
Sniffer.prototype.stateMetaAttribAfterName = function (c) {
if (SPACE_CHARACTERS.has(c) || c === Chars.EQUALS) {
this.state = State.AfterAttributeName;
this.stateAfterAttributeName(c);
}
else {
this.state = State.AnyAttribName;
this.stateAnyAttribName(c);
}
};
Sniffer.prototype.stateAnyAttribName = function (c) {
if (SPACE_CHARACTERS.has(c)) {
this.attribType = AttribType.None;
this.state = State.AfterAttributeName;
}
else if (c === Chars.SLASH || c === Chars.GT) {
this.state = State.BeforeTag;
}
else if (c === Chars.EQUALS) {
this.state = State.BeforeAttributeValue;
}
};
Sniffer.prototype.stateAfterAttributeName = function (c) {
if (SPACE_CHARACTERS.has(c))
return;
if (c === Chars.EQUALS) {
this.state = State.BeforeAttributeValue;
}
else {
this.state = State.BeforeAttribute;
this.stateBeforeAttribute(c);
}
};
Sniffer.prototype.stateBeforeAttributeValue = function (c) {
if (SPACE_CHARACTERS.has(c))
return;
this.attributeValue.length = 0;
this.sectionIndex = 0;
if (isQuote(c)) {
this.quoteCharacter = c;
this.state =
this.attribType === AttribType.Content
? State.MetaContentValueQuotedBeforeEncoding
: this.attribType === AttribType.HttpEquiv
? State.MetaAttribHttpEquivValue
: State.AttributeValueQuoted;
}
else if (this.attribType === AttribType.Content) {
this.state = State.MetaContentValueUnquotedBeforeEncoding;
this.stateMetaContentValueUnquotedBeforeEncoding(c);
}
else if (this.attribType === AttribType.HttpEquiv) {
// We use `quoteCharacter = 0` to signify that the value is unquoted.
this.quoteCharacter = 0;
this.sectionIndex = 0;
this.state = State.MetaAttribHttpEquivValue;
this.stateMetaAttribHttpEquivValue(c);
}
else {
this.state = State.AttributeValueUnquoted;
this.stateAttributeValueUnquoted(c);
}
};
// The value has to be `content-type`
Sniffer.prototype.stateMetaAttribHttpEquivValue = function (c) {
if (this.sectionIndex === exports.STRINGS.CONTENT_TYPE.length) {
if (this.quoteCharacter === 0
? END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)
: c === this.quoteCharacter) {
if (this.needsPragma !== null) {
this.setResult(this.needsPragma, ResultType.META_TAG);
}
else if (this.gotPragma === null) {
this.gotPragma = true;
}
this.state = State.BeforeAttribute;
return;
}
}
else if (this.advanceSectionIC(exports.STRINGS.CONTENT_TYPE, c)) {
return;
}
this.gotPragma = false;
if (this.quoteCharacter === 0) {
this.state = State.AttributeValueUnquoted;
this.stateAttributeValueUnquoted(c);
}
else {
this.state = State.AttributeValueQuoted;
this.stateAttributeValueQuoted(c);
}
};
Sniffer.prototype.handleMetaContentValue = function () {
if (this.attributeValue.length === 0)
return;
var encoding = String.fromCharCode.apply(String, this.attributeValue);
if (this.gotPragma) {
this.setResult(encoding, ResultType.META_TAG);
}
else if (this.needsPragma === null) {
// Don't override a previous result.
this.needsPragma = encoding;
}
this.attributeValue.length = 0;
};
Sniffer.prototype.handleAttributeValue = function () {
if (this.attribType === AttribType.Charset) {
this.setResult(String.fromCharCode.apply(String, this.attributeValue), ResultType.META_TAG);
}
};
Sniffer.prototype.stateAttributeValueUnquoted = function (c) {
if (SPACE_CHARACTERS.has(c)) {
this.handleAttributeValue();
this.state = State.BeforeAttribute;
}
else if (c === Chars.SLASH || c === Chars.GT) {
this.handleAttributeValue();
this.state = State.BeforeTag;
}
else if (this.attribType === AttribType.Charset) {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.findMetaContentEncoding = function (c) {
if (this.advanceSectionIC(exports.STRINGS.CHARSET, c)) {
if (this.sectionIndex === exports.STRINGS.CHARSET.length) {
return true;
}
}
else {
// If we encountered another `c`, assume we started over.
this.sectionIndex = Number(c === exports.STRINGS.CHARSET[0]);
}
return false;
};
Sniffer.prototype.stateMetaContentValueUnquotedBeforeEncoding = function (c) {
if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
this.stateAttributeValueUnquoted(c);
}
else if (this.sectionIndex === exports.STRINGS.CHARSET.length) {
if (c === Chars.EQUALS) {
this.state = State.MetaContentValueUnquotedBeforeValue;
}
}
else {
this.findMetaContentEncoding(c);
}
};
Sniffer.prototype.stateMetaContentValueUnquotedBeforeValue = function (c) {
if (isQuote(c)) {
this.quoteCharacter = c;
this.state = State.MetaContentValueUnquotedValueQuoted;
}
else if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
// Can't have spaces here, as it would no longer be part of the attribute value.
this.stateAttributeValueUnquoted(c);
}
else {
this.state = State.MetaContentValueUnquotedValueUnquoted;
this.stateMetaContentValueUnquotedValueUnquoted(c);
}
};
Sniffer.prototype.stateMetaContentValueUnquotedValueQuoted = function (c) {
if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
// Quotes weren't matched, so we're done.
this.stateAttributeValueUnquoted(c);
}
else if (c === this.quoteCharacter) {
this.handleMetaContentValue();
this.state = State.AttributeValueUnquoted;
}
else {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.stateMetaContentValueUnquotedValueUnquoted = function (c) {
if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c) || c === Chars.SEMICOLON) {
this.handleMetaContentValue();
this.state = State.AttributeValueUnquoted;
this.stateAttributeValueUnquoted(c);
}
else {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.stateMetaContentValueQuotedValueUnquoted = function (c) {
if (isQuote(c) || SPACE_CHARACTERS.has(c) || c === Chars.SEMICOLON) {
this.handleMetaContentValue();
// We are done with the value, but might not be at the end of the attribute
this.state = State.AttributeValueQuoted;
this.stateAttributeValueQuoted(c);
}
else {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.stateMetaContentValueQuotedValueQuoted = function (c) {
if (isQuote(c)) {
// We have reached the end of our value.
if (c !== this.quoteCharacter) {
// Only handle the value if inner quotes were matched.
this.handleMetaContentValue();
}
this.state = State.AttributeValueQuoted;
this.stateAttributeValueQuoted(c);
}
else {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.stateMetaContentValueQuotedBeforeEncoding = function (c) {
if (c === this.quoteCharacter) {
this.stateAttributeValueQuoted(c);
}
else if (this.findMetaContentEncoding(c)) {
this.state = State.MetaContentValueQuotedAfterEncoding;
}
};
Sniffer.prototype.stateMetaContentValueQuotedAfterEncoding = function (c) {
if (c === Chars.EQUALS) {
this.state = State.MetaContentValueQuotedBeforeValue;
}
else if (!SPACE_CHARACTERS.has(c)) {
// Look for the next encoding
this.state = State.MetaContentValueQuotedBeforeEncoding;
this.stateMetaContentValueQuotedBeforeEncoding(c);
}
};
Sniffer.prototype.stateMetaContentValueQuotedBeforeValue = function (c) {
if (c === this.quoteCharacter) {
this.stateAttributeValueQuoted(c);
}
else if (isQuote(c)) {
this.state = State.MetaContentValueQuotedValueQuoted;
}
else if (!SPACE_CHARACTERS.has(c)) {
this.state = State.MetaContentValueQuotedValueUnquoted;
this.stateMetaContentValueQuotedValueUnquoted(c);
}
};
Sniffer.prototype.stateAttributeValueQuoted = function (c) {
if (c === this.quoteCharacter) {
this.handleAttributeValue();
this.state = State.BeforeAttribute;
}
else if (this.attribType === AttribType.Charset) {
this.attributeValue.push(c | 0x20);
}
};
// Read STRINGS.XML_DECLARATION
Sniffer.prototype.stateXMLDeclaration = function (c) {
if (this.advanceSection(exports.STRINGS.XML_DECLARATION, c)) {
if (this.sectionIndex === exports.STRINGS.XML_DECLARATION.length) {
this.sectionIndex = 0;
this.state = State.XMLDeclarationBeforeEncoding;
}
}
else {
this.state = State.WeirdTag;
}
};
Sniffer.prototype.stateXMLDeclarationBeforeEncoding = function (c) {
if (this.advanceSection(exports.STRINGS.ENCODING, c)) {
if (this.sectionIndex === exports.STRINGS.ENCODING.length) {
this.state = State.XMLDeclarationAfterEncoding;
}
}
else if (c === Chars.GT) {
this.state = State.BeforeTag;
}
else {
// If we encountered another `c`, assume we started over.
this.sectionIndex = Number(c === exports.STRINGS.ENCODING[0]);
}
};
Sniffer.prototype.stateXMLDeclarationAfterEncoding = function (c) {
if (c === Chars.EQUALS) {
this.state = State.XMLDeclarationBeforeValue;
}
else if (c > Chars.SPACE) {
this.state = State.WeirdTag;
this.stateWeirdTag(c);
}
};
Sniffer.prototype.stateXMLDeclarationBeforeValue = function (c) {
if (isQuote(c)) {
this.attributeValue.length = 0;
this.state = State.XMLDeclarationValue;
}
else if (c > Chars.SPACE) {
this.state = State.WeirdTag;
this.stateWeirdTag(c);
}
};
Sniffer.prototype.stateXMLDeclarationValue = function (c) {
if (isQuote(c)) {
this.setResult(String.fromCharCode.apply(String, this.attributeValue), ResultType.XML_ENCODING);
this.state = State.WeirdTag;
}
else if (c === Chars.GT) {
this.state = State.BeforeTag;
}
else if (c <= Chars.SPACE) {
this.state = State.WeirdTag;
}
else {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.write = function (buffer) {
var index = 0;
for (; index < buffer.length && this.offset + index < this.maxBytes; index++) {
var c = buffer[index];
switch (this.state) {
case State.Begin: {
this.stateBegin(c);
break;
}
case State.BOM16BE: {
this.stateBOM16BE(c);
break;
}
case State.BOM16LE: {
this.stateBOM16LE(c);
break;
}
case State.BOM8: {
this.stateBOM8(c);
break;
}
case State.UTF16LE_XML_PREFIX: {
this.stateUTF16LE_XML_PREFIX(c);
break;
}
case State.BeginLT: {
this.stateBeginLT(c);
break;
}
case State.UTF16BE_XML_PREFIX: {
this.stateUTF16BE_XML_PREFIX(c);
break;
}
case State.BeforeTag: {
// Optimization: Skip all characters until we find a `<`
var idx = buffer.indexOf(Chars.LT, index);
if (idx < 0) {
// We are done with this buffer. Stay in the state and try on the next one.
index = buffer.length;
}
else {
index = idx;
this.stateBeforeTag(Chars.LT);
}
break;
}
case State.BeforeTagName: {
this.stateBeforeTagName(c);
break;
}
case State.BeforeCloseTagName: {
this.stateBeforeCloseTagName(c);
break;
}
case State.CommentStart: {
this.stateCommentStart(c);
break;
}
case State.CommentEnd: {
this.stateCommentEnd(c);
break;
}
case State.TagNameMeta: {
this.stateTagNameMeta(c);
break;
}
case State.TagNameOther: {
this.stateTagNameOther(c);
break;
}
case State.XMLDeclaration: {
this.stateXMLDeclaration(c);
break;
}
case State.XMLDeclarationBeforeEncoding: {
this.stateXMLDeclarationBeforeEncoding(c);
break;
}
case State.XMLDeclarationAfterEncoding: {
this.stateXMLDeclarationAfterEncoding(c);
break;
}
case State.XMLDeclarationBeforeValue: {
this.stateXMLDeclarationBeforeValue(c);
break;
}
case State.XMLDeclarationValue: {
this.stateXMLDeclarationValue(c);
break;
}
case State.WeirdTag: {
this.stateWeirdTag(c);
break;
}
case State.BeforeAttribute: {
this.stateBeforeAttribute(c);
break;
}
case State.MetaAttribHttpEquiv: {
this.stateMetaAttribHttpEquiv(c);
break;
}
case State.MetaAttribHttpEquivValue: {
this.stateMetaAttribHttpEquivValue(c);
break;
}
case State.MetaAttribC: {
this.stateMetaAttribC(c);
break;
}
case State.MetaAttribContent: {
this.stateMetaAttribContent(c);
break;
}
case State.MetaAttribCharset: {
this.stateMetaAttribCharset(c);
break;
}
case State.MetaAttribAfterName: {
this.stateMetaAttribAfterName(c);
break;
}
case State.MetaContentValueQuotedBeforeEncoding: {
this.stateMetaContentValueQuotedBeforeEncoding(c);
break;
}
case State.MetaContentValueQuotedAfterEncoding: {
this.stateMetaContentValueQuotedAfterEncoding(c);
break;
}
case State.MetaContentValueQuotedBeforeValue: {
this.stateMetaContentValueQuotedBeforeValue(c);
break;
}
case State.MetaContentValueQuotedValueQuoted: {
this.stateMetaContentValueQuotedValueQuoted(c);
break;
}
case State.MetaContentValueQuotedValueUnquoted: {
this.stateMetaContentValueQuotedValueUnquoted(c);
break;
}
case State.MetaContentValueUnquotedBeforeEncoding: {
this.stateMetaContentValueUnquotedBeforeEncoding(c);
break;
}
case State.MetaContentValueUnquotedBeforeValue: {
this.stateMetaContentValueUnquotedBeforeValue(c);
break;
}
case State.MetaContentValueUnquotedValueQuoted: {
this.stateMetaContentValueUnquotedValueQuoted(c);
break;
}
case State.MetaContentValueUnquotedValueUnquoted: {
this.stateMetaContentValueUnquotedValueUnquoted(c);
break;
}
case State.AnyAttribName: {
this.stateAnyAttribName(c);
break;
}
case State.AfterAttributeName: {
this.stateAfterAttributeName(c);
break;
}
case State.BeforeAttributeValue: {
this.stateBeforeAttributeValue(c);
break;
}
case State.AttributeValueQuoted: {
this.stateAttributeValueQuoted(c);
break;
}
default: {
// (State.AttributeValueUnquoted)
this.stateAttributeValueUnquoted(c);
}
}
}
this.offset += index;
};
return Sniffer;
}());
exports.Sniffer = Sniffer;
/** Get the encoding for the passed buffer. */
function getEncoding(buffer, options) {
var sniffer = new Sniffer(options);
sniffer.write(buffer);
return sniffer.encoding;
}
exports.getEncoding = getEncoding;
//# sourceMappingURL=sniffer.js.map

File diff suppressed because one or more lines are too long

33
book/node_modules/encoding-sniffer/dist/esm/index.d.ts generated vendored Normal file
View File

@@ -0,0 +1,33 @@
/// <reference types="node" resolution-mode="require"/>
/// <reference types="node" resolution-mode="require"/>
import { Transform, type TransformCallback } from "node:stream";
import type { SnifferOptions } from "./sniffer.js";
/**
* Sniff the encoding of a buffer, then decode it.
*
* @param buffer Buffer to be decoded
* @param options Options for the sniffer
* @returns The decoded buffer
*/
export declare function decodeBuffer(buffer: Buffer, options?: SnifferOptions): string;
/**
* Decodes a stream of buffers into a stream of strings.
*
* Reads the first 1024 bytes and passes them to the sniffer. Once an encoding
* has been determined, it passes all data to iconv-lite's stream and outputs
* the results.
*/
export declare class DecodeStream extends Transform {
private readonly sniffer;
private readonly buffers;
/** The iconv decode stream. If it is set, we have read more than `options.maxBytes` bytes. */
private iconv;
private readonly maxBytes;
private readBytes;
constructor(options?: SnifferOptions);
_transform(chunk: Uint8Array, _encoding: string, callback: TransformCallback): void;
private getIconvStream;
_flush(callback: TransformCallback): void;
}
export { type SnifferOptions, getEncoding } from "./sniffer.js";
//# sourceMappingURL=index.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;AAAA,OAAO,EAAE,SAAS,EAAE,KAAK,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAEhE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAGnD;;;;;;GAMG;AACH,wBAAgB,YAAY,CACxB,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,cAAmB,GAC7B,MAAM,CAER;AAED;;;;;;GAMG;AACH,qBAAa,YAAa,SAAQ,SAAS;IACvC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoB;IAC5C,8FAA8F;IAC9F,OAAO,CAAC,KAAK,CAAuC;IACpD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC;IAC1B,OAAO,CAAC,SAAS,CAAK;gBAEV,OAAO,CAAC,EAAE,cAAc;IAM3B,UAAU,CACf,KAAK,EAAE,UAAU,EACjB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,iBAAiB,GAC5B,IAAI;IAeP,OAAO,CAAC,cAAc;IAmBb,MAAM,CAAC,QAAQ,EAAE,iBAAiB,GAAG,IAAI;CAGrD;AAED,OAAO,EAAE,KAAK,cAAc,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC"}

85
book/node_modules/encoding-sniffer/dist/esm/index.js generated vendored Normal file
View File

@@ -0,0 +1,85 @@
var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
if (typeof b !== "function" && b !== null)
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
import { Transform } from "node:stream";
import iconv from "iconv-lite";
import { Sniffer, getEncoding } from "./sniffer.js";
/**
* Sniff the encoding of a buffer, then decode it.
*
* @param buffer Buffer to be decoded
* @param options Options for the sniffer
* @returns The decoded buffer
*/
export function decodeBuffer(buffer, options) {
if (options === void 0) { options = {}; }
return iconv.decode(buffer, getEncoding(buffer, options));
}
/**
* Decodes a stream of buffers into a stream of strings.
*
* Reads the first 1024 bytes and passes them to the sniffer. Once an encoding
* has been determined, it passes all data to iconv-lite's stream and outputs
* the results.
*/
var DecodeStream = /** @class */ (function (_super) {
__extends(DecodeStream, _super);
function DecodeStream(options) {
var _a;
var _this = _super.call(this, { decodeStrings: false, encoding: "utf-8" }) || this;
_this.buffers = [];
/** The iconv decode stream. If it is set, we have read more than `options.maxBytes` bytes. */
_this.iconv = null;
_this.readBytes = 0;
_this.sniffer = new Sniffer(options);
_this.maxBytes = (_a = options === null || options === void 0 ? void 0 : options.maxBytes) !== null && _a !== void 0 ? _a : 1024;
return _this;
}
DecodeStream.prototype._transform = function (chunk, _encoding, callback) {
if (this.readBytes < this.maxBytes) {
this.sniffer.write(chunk);
this.readBytes += chunk.length;
if (this.readBytes < this.maxBytes) {
this.buffers.push(chunk);
callback();
return;
}
}
this.getIconvStream().write(chunk, callback);
};
DecodeStream.prototype.getIconvStream = function () {
var _this = this;
if (this.iconv) {
return this.iconv;
}
var stream = iconv.decodeStream(this.sniffer.encoding);
stream.on("data", function (chunk) { return _this.push(chunk, "utf-8"); });
stream.on("end", function () { return _this.push(null); });
this.iconv = stream;
for (var _i = 0, _a = this.buffers; _i < _a.length; _i++) {
var buffer = _a[_i];
stream.write(buffer);
}
this.buffers.length = 0;
return stream;
};
DecodeStream.prototype._flush = function (callback) {
this.getIconvStream().end(callback);
};
return DecodeStream;
}(Transform));
export { DecodeStream };
export { getEncoding } from "./sniffer.js";
//# sourceMappingURL=index.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,aAAa,CAAC;AAChE,OAAO,KAAK,MAAM,YAAY,CAAC;AAE/B,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAEpD;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CACxB,MAAc,EACd,OAA4B;IAA5B,wBAAA,EAAA,YAA4B;IAE5B,OAAO,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;AAC9D,CAAC;AAED;;;;;;GAMG;AACH;IAAkC,gCAAS;IAQvC,sBAAY,OAAwB;;QAChC,YAAA,MAAK,YAAC,EAAE,aAAa,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,SAAC;QAPtC,aAAO,GAAiB,EAAE,CAAC;QAC5C,8FAA8F;QACtF,WAAK,GAAkC,IAAI,CAAC;QAE5C,eAAS,GAAG,CAAC,CAAC;QAIlB,KAAI,CAAC,OAAO,GAAG,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;QACpC,KAAI,CAAC,QAAQ,GAAG,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,QAAQ,mCAAI,IAAI,CAAC;;IAC9C,CAAC;IAEQ,iCAAU,GAAnB,UACI,KAAiB,EACjB,SAAiB,EACjB,QAA2B;QAE3B,IAAI,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;YACjC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAC1B,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC,MAAM,CAAC;YAE/B,IAAI,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACjC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACzB,QAAQ,EAAE,CAAC;gBACX,OAAO;YACX,CAAC;QACL,CAAC;QAED,IAAI,CAAC,cAAc,EAAE,CAAC,KAAK,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IACjD,CAAC;IAEO,qCAAc,GAAtB;QAAA,iBAiBC;QAhBG,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,IAAI,CAAC,KAAK,CAAC;QACtB,CAAC;QAED,IAAM,MAAM,GAAG,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzD,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,UAAC,KAAa,IAAK,OAAA,KAAI,CAAC,IAAI,CAAC,KAAK,EAAE,OAAO,CAAC,EAAzB,CAAyB,CAAC,CAAC;QAChE,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,cAAM,OAAA,KAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAf,CAAe,CAAC,CAAC;QAExC,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC;QAEpB,KAAqB,UAAY,EAAZ,KAAA,IAAI,CAAC,OAAO,EAAZ,cAAY,EAAZ,IAAY,EAAE,CAAC;YAA/B,IAAM,MAAM,SAAA;YACb,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACzB,CAAC;QACD,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;QAExB,OAAO,MAAM,CAAC;IAClB,CAAC;IAEQ,6BAAM,GAAf,UAAgB,QAA2B;QACvC,IAAI,CAAC,cAAc,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC;IACL,mBAAC;AAAD,CAAC,AAvDD,CAAkC,SAAS,GAuD1C;;AAED,OAAO,EAAuB,WAAW,EAAE,MAAM,cAAc,CAAC"}

View File

@@ -0,0 +1,3 @@
{
"type": "module"
}

View File

@@ -0,0 +1,148 @@
export declare enum ResultType {
BOM = 0,
PASSED = 1,
XML_PREFIX = 2,
META_TAG = 3,
XML_ENCODING = 4,
DEFAULT = 5
}
export declare const STRINGS: {
UTF8_BOM: Uint8Array;
UTF16LE_BOM: Uint8Array;
UTF16BE_BOM: Uint8Array;
UTF16LE_XML_PREFIX: Uint8Array;
UTF16BE_XML_PREFIX: Uint8Array;
XML_DECLARATION: Uint8Array;
ENCODING: Uint8Array;
META: Uint8Array;
HTTP_EQUIV: Uint8Array;
CONTENT: Uint8Array;
CONTENT_TYPE: Uint8Array;
CHARSET: Uint8Array;
COMMENT_START: Uint8Array;
COMMENT_END: Uint8Array;
};
export interface SnifferOptions {
/**
* The maximum number of bytes to sniff.
*
* @default 1024
*/
maxBytes?: number;
/**
* The encoding specified by the user.
*/
userEncoding?: string;
/**
* The encoding specified by the transport layer.
*/
transportLayerEncodingLabel?: string;
/**
* The default encoding to use.
*
* @default "windows-1252"
*/
defaultEncoding?: string;
}
export declare class Sniffer {
/** The maximum number of bytes to sniff. */
private readonly maxBytes;
/** The offset of the previous buffers. */
private offset;
private state;
private sectionIndex;
private attribType;
/**
* Indicates if the `http-equiv` is `content-type`.
*
* Initially `null`, a boolean when a value is found.
*/
private gotPragma;
private needsPragma;
private inMetaTag;
encoding: string;
resultType: ResultType;
private setResult;
constructor({ maxBytes, userEncoding, transportLayerEncodingLabel, defaultEncoding, }?: SnifferOptions);
private stateBegin;
private stateBeginLT;
private stateUTF16BE_XML_PREFIX;
private stateUTF16LE_XML_PREFIX;
private stateBOM16LE;
private stateBOM16BE;
private stateBOM8;
private stateBeforeTag;
/**
* We have seen a `<`, and now have to figure out what to do.
*
* Options:
* - `<meta`
* - Any other tag
* - A closing tag
* - `<!--`
* - An XML declaration
*
*/
private stateBeforeTagName;
private stateBeforeCloseTagName;
private stateCommentStart;
private stateCommentEnd;
/**
* Any section starting with `<!`, `<?`, `</`, without being a closing tag or comment.
*/
private stateWeirdTag;
/**
* Advances the section, ignoring upper/lower case.
*
* Make sure the section has left-over characters before calling.
*
* @returns `false` if we did not match the section.
*/
private advanceSectionIC;
/**
* Advances the section.
*
* Make sure the section has left-over characters before calling.
*
* @returns `false` if we did not match the section.
*/
private advanceSection;
private stateTagNameMeta;
private stateTagNameOther;
private stateBeforeAttribute;
private handleMetaAttrib;
private stateMetaAttribHttpEquiv;
private stateMetaAttribC;
private stateMetaAttribCharset;
private stateMetaAttribContent;
private stateMetaAttribAfterName;
private stateAnyAttribName;
private stateAfterAttributeName;
private quoteCharacter;
private readonly attributeValue;
private stateBeforeAttributeValue;
private stateMetaAttribHttpEquivValue;
private handleMetaContentValue;
private handleAttributeValue;
private stateAttributeValueUnquoted;
private findMetaContentEncoding;
private stateMetaContentValueUnquotedBeforeEncoding;
private stateMetaContentValueUnquotedBeforeValue;
private stateMetaContentValueUnquotedValueQuoted;
private stateMetaContentValueUnquotedValueUnquoted;
private stateMetaContentValueQuotedValueUnquoted;
private stateMetaContentValueQuotedValueQuoted;
private stateMetaContentValueQuotedBeforeEncoding;
private stateMetaContentValueQuotedAfterEncoding;
private stateMetaContentValueQuotedBeforeValue;
private stateAttributeValueQuoted;
private stateXMLDeclaration;
private stateXMLDeclarationBeforeEncoding;
private stateXMLDeclarationAfterEncoding;
private stateXMLDeclarationBeforeValue;
private stateXMLDeclarationValue;
write(buffer: Uint8Array): void;
}
/** Get the encoding for the passed buffer. */
export declare function getEncoding(buffer: Uint8Array, options?: SnifferOptions): string;
//# sourceMappingURL=sniffer.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"sniffer.d.ts","sourceRoot":"","sources":["../../src/sniffer.ts"],"names":[],"mappings":"AAuEA,oBAAY,UAAU;IAElB,GAAG,IAAI;IAEP,MAAM,IAAI;IAEV,UAAU,IAAI;IAEd,QAAQ,IAAI;IAEZ,YAAY,IAAI;IAEhB,OAAO,IAAI;CACd;AAgDD,eAAO,MAAM,OAAO;;;;;;;;;;;;;;;CAenB,CAAC;AAaF,MAAM,WAAW,cAAc;IAC3B;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;OAEG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IACrC;;;;OAIG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,qBAAa,OAAO;IAChB,4CAA4C;IAC5C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,0CAA0C;IAC1C,OAAO,CAAC,MAAM,CAAK;IAEnB,OAAO,CAAC,KAAK,CAAe;IAC5B,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,UAAU,CAAmB;IACrC;;;;OAIG;IACH,OAAO,CAAC,SAAS,CAAwB;IACzC,OAAO,CAAC,WAAW,CAAuB;IAE1C,OAAO,CAAC,SAAS,CAAS;IAEnB,QAAQ,SAAkB;IAC1B,UAAU,aAAsB;IAEvC,OAAO,CAAC,SAAS;gBAsBL,EACR,QAAe,EACf,YAAY,EACZ,2BAA2B,EAC3B,eAAe,GAClB,GAAE,cAAmB;IAetB,OAAO,CAAC,UAAU;IAmClB,OAAO,CAAC,YAAY;IAapB,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,YAAY;IASpB,OAAO,CAAC,YAAY;IASpB,OAAO,CAAC,SAAS;IASjB,OAAO,CAAC,cAAc;IAOtB;;;;;;;;;;OAUG;IACH,OAAO,CAAC,kBAAkB;IAiC1B,OAAO,CAAC,uBAAuB;IAO/B,OAAO,CAAC,iBAAiB;IAazB,OAAO,CAAC,eAAe;IAcvB;;OAEG;IACH,OAAO,CAAC,aAAa;IAMrB;;;;;;OAMG;IACH,OAAO,CAAC,gBAAgB;IAIxB;;;;;;OAMG;IACH,OAAO,CAAC,cAAc;IAUtB,OAAO,CAAC,gBAAgB;IAkBxB,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,oBAAoB;IAsB5B,OAAO,CAAC,gBAAgB;IAgBxB,OAAO,CAAC,wBAAwB;IAIhC,OAAO,CAAC,gBAAgB;IAcxB,OAAO,CAAC,sBAAsB;IAI9B,OAAO,CAAC,sBAAsB;IAI9B,OAAO,CAAC,wBAAwB;IAUhC,OAAO,CAAC,kBAAkB;IAW1B,OAAO,CAAC,uBAAuB;IAW/B,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAgB;IAE/C,OAAO,CAAC,yBAAyB;IA8BjC,OAAO,CAAC,6BAA6B;IA+BrC,OAAO,CAAC,sBAAsB;IAe9B,OAAO,CAAC,oBAAoB;IAS5B,OAAO,CAAC,2BAA2B;IAYnC,OAAO,CAAC,uBAAuB;IAY/B,OAAO,CAAC,2CAA2C;IAYnD,OAAO,CAAC,wCAAwC;IAahD,OAAO,CAAC,wCAAwC;IAYhD,OAAO,CAAC,0CAA0C;IAUlD,OAAO,CAAC,wCAAwC;IAWhD,OAAO,CAAC,sCAAsC;IAgB9C,OAAO,CAAC,yCAAyC;IAQjD,OAAO,CAAC,wCAAwC;IAUhD,OAAO,CAAC,sCAAsC;IAW9C,OAAO,CAAC,yBAAyB;IAUjC,OAAO,CAAC,mBAAmB;IAW3B,OAAO,CAAC,iCAAiC;IAazC,OAAO,CAAC,gCAAgC;IASxC,OAAO,CAAC,8BAA8B;IAUtC,OAAO,CAAC,wBAAwB;IAgBzB,KAAK,CAAC,MAAM,EAAE,UAAU,GAAG,IAAI;CAoOzC;AAED,8CAA8C;AAC9C,wBAAgB,WAAW,CACvB,MAAM,EAAE,UAAU,EAClB,OAAO,CAAC,EAAE,cAAc,GACzB,MAAM,CAIR"}

988
book/node_modules/encoding-sniffer/dist/esm/sniffer.js generated vendored Normal file
View File

@@ -0,0 +1,988 @@
import { labelToName } from "whatwg-encoding";
// https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding
var State;
(function (State) {
// Before anything starts; can be any of BOM, UTF-16 XML declarations or meta tags
State[State["Begin"] = 0] = "Begin";
// Inside of a BOM
State[State["BOM16BE"] = 1] = "BOM16BE";
State[State["BOM16LE"] = 2] = "BOM16LE";
State[State["BOM8"] = 3] = "BOM8";
// XML prefix
State[State["UTF16LE_XML_PREFIX"] = 4] = "UTF16LE_XML_PREFIX";
State[State["BeginLT"] = 5] = "BeginLT";
State[State["UTF16BE_XML_PREFIX"] = 6] = "UTF16BE_XML_PREFIX";
// Waiting for opening `<`
State[State["BeforeTag"] = 7] = "BeforeTag";
// After the opening `<`
State[State["BeforeTagName"] = 8] = "BeforeTagName";
// After `</`
State[State["BeforeCloseTagName"] = 9] = "BeforeCloseTagName";
// Beginning of a comment
State[State["CommentStart"] = 10] = "CommentStart";
// End of a comment
State[State["CommentEnd"] = 11] = "CommentEnd";
// A tag name that could be `meta`
State[State["TagNameMeta"] = 12] = "TagNameMeta";
// A tag name that is not `meta`
State[State["TagNameOther"] = 13] = "TagNameOther";
// XML declaration
State[State["XMLDeclaration"] = 14] = "XMLDeclaration";
State[State["XMLDeclarationBeforeEncoding"] = 15] = "XMLDeclarationBeforeEncoding";
State[State["XMLDeclarationAfterEncoding"] = 16] = "XMLDeclarationAfterEncoding";
State[State["XMLDeclarationBeforeValue"] = 17] = "XMLDeclarationBeforeValue";
State[State["XMLDeclarationValue"] = 18] = "XMLDeclarationValue";
// Anything that looks like a tag, but doesn't fit in the above categories
State[State["WeirdTag"] = 19] = "WeirdTag";
State[State["BeforeAttribute"] = 20] = "BeforeAttribute";
/*
* Attributes in meta tag — we compare them to our set here, and back out
* We care about four attributes: http-equiv, content-type, content, charset
*/
State[State["MetaAttribHttpEquiv"] = 21] = "MetaAttribHttpEquiv";
// The value has to be `content-type`
State[State["MetaAttribHttpEquivValue"] = 22] = "MetaAttribHttpEquivValue";
State[State["MetaAttribC"] = 23] = "MetaAttribC";
State[State["MetaAttribContent"] = 24] = "MetaAttribContent";
State[State["MetaAttribCharset"] = 25] = "MetaAttribCharset";
// Waiting for whitespace
State[State["MetaAttribAfterName"] = 26] = "MetaAttribAfterName";
State[State["MetaContentValueQuotedBeforeEncoding"] = 27] = "MetaContentValueQuotedBeforeEncoding";
State[State["MetaContentValueQuotedAfterEncoding"] = 28] = "MetaContentValueQuotedAfterEncoding";
State[State["MetaContentValueQuotedBeforeValue"] = 29] = "MetaContentValueQuotedBeforeValue";
State[State["MetaContentValueQuotedValueQuoted"] = 30] = "MetaContentValueQuotedValueQuoted";
State[State["MetaContentValueQuotedValueUnquoted"] = 31] = "MetaContentValueQuotedValueUnquoted";
State[State["MetaContentValueUnquotedBeforeEncoding"] = 32] = "MetaContentValueUnquotedBeforeEncoding";
State[State["MetaContentValueUnquotedBeforeValue"] = 33] = "MetaContentValueUnquotedBeforeValue";
State[State["MetaContentValueUnquotedValueQuoted"] = 34] = "MetaContentValueUnquotedValueQuoted";
State[State["MetaContentValueUnquotedValueUnquoted"] = 35] = "MetaContentValueUnquotedValueUnquoted";
State[State["AnyAttribName"] = 36] = "AnyAttribName";
// After the name of an attribute, before the equals sign
State[State["AfterAttributeName"] = 37] = "AfterAttributeName";
// After `=`
State[State["BeforeAttributeValue"] = 38] = "BeforeAttributeValue";
State[State["AttributeValueQuoted"] = 39] = "AttributeValueQuoted";
State[State["AttributeValueUnquoted"] = 40] = "AttributeValueUnquoted";
})(State || (State = {}));
export var ResultType;
(function (ResultType) {
// Byte order mark
ResultType[ResultType["BOM"] = 0] = "BOM";
// User- or transport layer-defined
ResultType[ResultType["PASSED"] = 1] = "PASSED";
// XML prefixes
ResultType[ResultType["XML_PREFIX"] = 2] = "XML_PREFIX";
// Meta tag
ResultType[ResultType["META_TAG"] = 3] = "META_TAG";
// XML encoding
ResultType[ResultType["XML_ENCODING"] = 4] = "XML_ENCODING";
// Default
ResultType[ResultType["DEFAULT"] = 5] = "DEFAULT";
})(ResultType || (ResultType = {}));
var AttribType;
(function (AttribType) {
AttribType[AttribType["None"] = 0] = "None";
AttribType[AttribType["HttpEquiv"] = 1] = "HttpEquiv";
AttribType[AttribType["Content"] = 2] = "Content";
AttribType[AttribType["Charset"] = 3] = "Charset";
})(AttribType || (AttribType = {}));
var Chars;
(function (Chars) {
Chars[Chars["NIL"] = 0] = "NIL";
Chars[Chars["TAB"] = 9] = "TAB";
Chars[Chars["LF"] = 10] = "LF";
Chars[Chars["CR"] = 13] = "CR";
Chars[Chars["SPACE"] = 32] = "SPACE";
Chars[Chars["EXCLAMATION"] = 33] = "EXCLAMATION";
Chars[Chars["DQUOTE"] = 34] = "DQUOTE";
Chars[Chars["SQUOTE"] = 39] = "SQUOTE";
Chars[Chars["DASH"] = 45] = "DASH";
Chars[Chars["SLASH"] = 47] = "SLASH";
Chars[Chars["SEMICOLON"] = 59] = "SEMICOLON";
Chars[Chars["LT"] = 60] = "LT";
Chars[Chars["EQUALS"] = 61] = "EQUALS";
Chars[Chars["GT"] = 62] = "GT";
Chars[Chars["QUESTION"] = 63] = "QUESTION";
Chars[Chars["UpperA"] = 65] = "UpperA";
Chars[Chars["UpperZ"] = 90] = "UpperZ";
Chars[Chars["LowerA"] = 97] = "LowerA";
Chars[Chars["LowerZ"] = 122] = "LowerZ";
})(Chars || (Chars = {}));
var SPACE_CHARACTERS = new Set([Chars.SPACE, Chars.LF, Chars.CR, Chars.TAB]);
var END_OF_UNQUOTED_ATTRIBUTE_VALUE = new Set([
Chars.SPACE,
Chars.LF,
Chars.CR,
Chars.TAB,
Chars.GT,
]);
function toUint8Array(str) {
var arr = new Uint8Array(str.length);
for (var i = 0; i < str.length; i++) {
arr[i] = str.charCodeAt(i);
}
return arr;
}
export var STRINGS = {
UTF8_BOM: new Uint8Array([0xef, 0xbb, 0xbf]),
UTF16LE_BOM: new Uint8Array([0xff, 0xfe]),
UTF16BE_BOM: new Uint8Array([0xfe, 0xff]),
UTF16LE_XML_PREFIX: new Uint8Array([0x3c, 0x0, 0x3f, 0x0, 0x78, 0x0]),
UTF16BE_XML_PREFIX: new Uint8Array([0x0, 0x3c, 0x0, 0x3f, 0x0, 0x78]),
XML_DECLARATION: toUint8Array("<?xml"),
ENCODING: toUint8Array("encoding"),
META: toUint8Array("meta"),
HTTP_EQUIV: toUint8Array("http-equiv"),
CONTENT: toUint8Array("content"),
CONTENT_TYPE: toUint8Array("content-type"),
CHARSET: toUint8Array("charset"),
COMMENT_START: toUint8Array("<!--"),
COMMENT_END: toUint8Array("-->"),
};
function isAsciiAlpha(c) {
return ((c >= Chars.UpperA && c <= Chars.UpperZ) ||
(c >= Chars.LowerA && c <= Chars.LowerZ));
}
function isQuote(c) {
return c === Chars.DQUOTE || c === Chars.SQUOTE;
}
var Sniffer = /** @class */ (function () {
function Sniffer(_a) {
var _b = _a === void 0 ? {} : _a, _c = _b.maxBytes, maxBytes = _c === void 0 ? 1024 : _c, userEncoding = _b.userEncoding, transportLayerEncodingLabel = _b.transportLayerEncodingLabel, defaultEncoding = _b.defaultEncoding;
/** The offset of the previous buffers. */
this.offset = 0;
this.state = State.Begin;
this.sectionIndex = 0;
this.attribType = AttribType.None;
/**
* Indicates if the `http-equiv` is `content-type`.
*
* Initially `null`, a boolean when a value is found.
*/
this.gotPragma = null;
this.needsPragma = null;
this.inMetaTag = false;
this.encoding = "windows-1252";
this.resultType = ResultType.DEFAULT;
this.quoteCharacter = 0;
this.attributeValue = [];
this.maxBytes = maxBytes;
if (userEncoding) {
this.setResult(userEncoding, ResultType.PASSED);
}
if (transportLayerEncodingLabel) {
this.setResult(transportLayerEncodingLabel, ResultType.PASSED);
}
if (defaultEncoding) {
this.setResult(defaultEncoding, ResultType.DEFAULT);
}
}
Sniffer.prototype.setResult = function (label, type) {
if (this.resultType === ResultType.DEFAULT || this.resultType > type) {
var encoding = labelToName(label);
if (encoding) {
this.encoding =
// Check if we are in a meta tag and the encoding is `x-user-defined`
type === ResultType.META_TAG &&
encoding === "x-user-defined"
? "windows-1252"
: // Check if we are in a meta tag or xml declaration, and the encoding is UTF-16
(type === ResultType.META_TAG ||
type === ResultType.XML_ENCODING) &&
(encoding === "UTF-16LE" || encoding === "UTF-16BE")
? "UTF-8"
: encoding;
this.resultType = type;
}
}
};
Sniffer.prototype.stateBegin = function (c) {
switch (c) {
case STRINGS.UTF16BE_BOM[0]: {
this.state = State.BOM16BE;
break;
}
case STRINGS.UTF16LE_BOM[0]: {
this.state = State.BOM16LE;
break;
}
case STRINGS.UTF8_BOM[0]: {
this.sectionIndex = 1;
this.state = State.BOM8;
break;
}
case Chars.NIL: {
this.state = State.UTF16BE_XML_PREFIX;
this.sectionIndex = 1;
break;
}
case Chars.LT: {
this.state = State.BeginLT;
break;
}
default: {
this.state = State.BeforeTag;
}
}
};
Sniffer.prototype.stateBeginLT = function (c) {
if (c === Chars.NIL) {
this.state = State.UTF16LE_XML_PREFIX;
this.sectionIndex = 2;
}
else if (c === Chars.QUESTION) {
this.state = State.XMLDeclaration;
this.sectionIndex = 2;
}
else {
this.state = State.BeforeTagName;
this.stateBeforeTagName(c);
}
};
Sniffer.prototype.stateUTF16BE_XML_PREFIX = function (c) {
// Advance position in the section
if (this.advanceSection(STRINGS.UTF16BE_XML_PREFIX, c)) {
if (this.sectionIndex === STRINGS.UTF16BE_XML_PREFIX.length) {
// We have the whole prefix
this.setResult("utf-16be", ResultType.XML_PREFIX);
}
}
else {
this.state = State.BeforeTag;
this.stateBeforeTag(c);
}
};
Sniffer.prototype.stateUTF16LE_XML_PREFIX = function (c) {
// Advance position in the section
if (this.advanceSection(STRINGS.UTF16LE_XML_PREFIX, c)) {
if (this.sectionIndex === STRINGS.UTF16LE_XML_PREFIX.length) {
// We have the whole prefix
this.setResult("utf-16le", ResultType.XML_PREFIX);
}
}
else {
this.state = State.BeforeTag;
this.stateBeforeTag(c);
}
};
Sniffer.prototype.stateBOM16LE = function (c) {
if (c === STRINGS.UTF16LE_BOM[1]) {
this.setResult("utf-16le", ResultType.BOM);
}
else {
this.state = State.BeforeTag;
this.stateBeforeTag(c);
}
};
Sniffer.prototype.stateBOM16BE = function (c) {
if (c === STRINGS.UTF16BE_BOM[1]) {
this.setResult("utf-16be", ResultType.BOM);
}
else {
this.state = State.BeforeTag;
this.stateBeforeTag(c);
}
};
Sniffer.prototype.stateBOM8 = function (c) {
if (this.advanceSection(STRINGS.UTF8_BOM, c) &&
this.sectionIndex === STRINGS.UTF8_BOM.length) {
this.setResult("utf-8", ResultType.BOM);
}
};
Sniffer.prototype.stateBeforeTag = function (c) {
if (c === Chars.LT) {
this.state = State.BeforeTagName;
this.inMetaTag = false;
}
};
/**
* We have seen a `<`, and now have to figure out what to do.
*
* Options:
* - `<meta`
* - Any other tag
* - A closing tag
* - `<!--`
* - An XML declaration
*
*/
Sniffer.prototype.stateBeforeTagName = function (c) {
if (isAsciiAlpha(c)) {
if ((c | 0x20) === STRINGS.META[0]) {
this.sectionIndex = 1;
this.state = State.TagNameMeta;
}
else {
this.state = State.TagNameOther;
}
}
else
switch (c) {
case Chars.SLASH: {
this.state = State.BeforeCloseTagName;
break;
}
case Chars.EXCLAMATION: {
this.state = State.CommentStart;
this.sectionIndex = 2;
break;
}
case Chars.QUESTION: {
this.state = State.WeirdTag;
break;
}
default: {
this.state = State.BeforeTag;
this.stateBeforeTag(c);
}
}
};
Sniffer.prototype.stateBeforeCloseTagName = function (c) {
this.state = isAsciiAlpha(c)
? // Switch to `TagNameOther`; the HTML spec allows attributes here as well.
State.TagNameOther
: State.WeirdTag;
};
Sniffer.prototype.stateCommentStart = function (c) {
if (this.advanceSection(STRINGS.COMMENT_START, c)) {
if (this.sectionIndex === STRINGS.COMMENT_START.length) {
this.state = State.CommentEnd;
// The -- of the comment start can be part of the end.
this.sectionIndex = 2;
}
}
else {
this.state = State.WeirdTag;
this.stateWeirdTag(c);
}
};
Sniffer.prototype.stateCommentEnd = function (c) {
if (this.advanceSection(STRINGS.COMMENT_END, c)) {
if (this.sectionIndex === STRINGS.COMMENT_END.length) {
this.state = State.BeforeTag;
}
}
else if (c === Chars.DASH) {
/*
* If we are here, we know we expected a `>` above.
* Set this to 2, to support many dashes before the closing `>`.
*/
this.sectionIndex = 2;
}
};
/**
* Any section starting with `<!`, `<?`, `</`, without being a closing tag or comment.
*/
Sniffer.prototype.stateWeirdTag = function (c) {
if (c === Chars.GT) {
this.state = State.BeforeTag;
}
};
/**
* Advances the section, ignoring upper/lower case.
*
* Make sure the section has left-over characters before calling.
*
* @returns `false` if we did not match the section.
*/
Sniffer.prototype.advanceSectionIC = function (section, c) {
return this.advanceSection(section, c | 0x20);
};
/**
* Advances the section.
*
* Make sure the section has left-over characters before calling.
*
* @returns `false` if we did not match the section.
*/
Sniffer.prototype.advanceSection = function (section, c) {
if (section[this.sectionIndex] === c) {
this.sectionIndex++;
return true;
}
this.sectionIndex = 0;
return false;
};
Sniffer.prototype.stateTagNameMeta = function (c) {
if (this.sectionIndex < STRINGS.META.length) {
if (this.advanceSectionIC(STRINGS.META, c)) {
return;
}
}
else if (SPACE_CHARACTERS.has(c)) {
this.inMetaTag = true;
this.gotPragma = null;
this.needsPragma = null;
this.state = State.BeforeAttribute;
return;
}
this.state = State.TagNameOther;
// Reconsume in case there is a `>`.
this.stateTagNameOther(c);
};
Sniffer.prototype.stateTagNameOther = function (c) {
if (SPACE_CHARACTERS.has(c)) {
this.state = State.BeforeAttribute;
}
else if (c === Chars.GT) {
this.state = State.BeforeTag;
}
};
Sniffer.prototype.stateBeforeAttribute = function (c) {
if (SPACE_CHARACTERS.has(c))
return;
if (this.inMetaTag) {
var lower = c | 0x20;
if (lower === STRINGS.HTTP_EQUIV[0]) {
this.sectionIndex = 1;
this.state = State.MetaAttribHttpEquiv;
return;
}
else if (lower === STRINGS.CHARSET[0]) {
this.sectionIndex = 1;
this.state = State.MetaAttribC;
return;
}
}
this.state =
c === Chars.SLASH || c === Chars.GT
? State.BeforeTag
: State.AnyAttribName;
};
Sniffer.prototype.handleMetaAttrib = function (c, section, type) {
if (this.advanceSectionIC(section, c)) {
if (this.sectionIndex === section.length) {
this.attribType = type;
this.state = State.MetaAttribAfterName;
}
}
else {
this.state = State.AnyAttribName;
this.stateAnyAttribName(c);
}
};
Sniffer.prototype.stateMetaAttribHttpEquiv = function (c) {
this.handleMetaAttrib(c, STRINGS.HTTP_EQUIV, AttribType.HttpEquiv);
};
Sniffer.prototype.stateMetaAttribC = function (c) {
var lower = c | 0x20;
if (lower === STRINGS.CHARSET[1]) {
this.sectionIndex = 2;
this.state = State.MetaAttribCharset;
}
else if (lower === STRINGS.CONTENT[1]) {
this.sectionIndex = 2;
this.state = State.MetaAttribContent;
}
else {
this.state = State.AnyAttribName;
this.stateAnyAttribName(c);
}
};
Sniffer.prototype.stateMetaAttribCharset = function (c) {
this.handleMetaAttrib(c, STRINGS.CHARSET, AttribType.Charset);
};
Sniffer.prototype.stateMetaAttribContent = function (c) {
this.handleMetaAttrib(c, STRINGS.CONTENT, AttribType.Content);
};
Sniffer.prototype.stateMetaAttribAfterName = function (c) {
if (SPACE_CHARACTERS.has(c) || c === Chars.EQUALS) {
this.state = State.AfterAttributeName;
this.stateAfterAttributeName(c);
}
else {
this.state = State.AnyAttribName;
this.stateAnyAttribName(c);
}
};
Sniffer.prototype.stateAnyAttribName = function (c) {
if (SPACE_CHARACTERS.has(c)) {
this.attribType = AttribType.None;
this.state = State.AfterAttributeName;
}
else if (c === Chars.SLASH || c === Chars.GT) {
this.state = State.BeforeTag;
}
else if (c === Chars.EQUALS) {
this.state = State.BeforeAttributeValue;
}
};
Sniffer.prototype.stateAfterAttributeName = function (c) {
if (SPACE_CHARACTERS.has(c))
return;
if (c === Chars.EQUALS) {
this.state = State.BeforeAttributeValue;
}
else {
this.state = State.BeforeAttribute;
this.stateBeforeAttribute(c);
}
};
Sniffer.prototype.stateBeforeAttributeValue = function (c) {
if (SPACE_CHARACTERS.has(c))
return;
this.attributeValue.length = 0;
this.sectionIndex = 0;
if (isQuote(c)) {
this.quoteCharacter = c;
this.state =
this.attribType === AttribType.Content
? State.MetaContentValueQuotedBeforeEncoding
: this.attribType === AttribType.HttpEquiv
? State.MetaAttribHttpEquivValue
: State.AttributeValueQuoted;
}
else if (this.attribType === AttribType.Content) {
this.state = State.MetaContentValueUnquotedBeforeEncoding;
this.stateMetaContentValueUnquotedBeforeEncoding(c);
}
else if (this.attribType === AttribType.HttpEquiv) {
// We use `quoteCharacter = 0` to signify that the value is unquoted.
this.quoteCharacter = 0;
this.sectionIndex = 0;
this.state = State.MetaAttribHttpEquivValue;
this.stateMetaAttribHttpEquivValue(c);
}
else {
this.state = State.AttributeValueUnquoted;
this.stateAttributeValueUnquoted(c);
}
};
// The value has to be `content-type`
Sniffer.prototype.stateMetaAttribHttpEquivValue = function (c) {
if (this.sectionIndex === STRINGS.CONTENT_TYPE.length) {
if (this.quoteCharacter === 0
? END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)
: c === this.quoteCharacter) {
if (this.needsPragma !== null) {
this.setResult(this.needsPragma, ResultType.META_TAG);
}
else if (this.gotPragma === null) {
this.gotPragma = true;
}
this.state = State.BeforeAttribute;
return;
}
}
else if (this.advanceSectionIC(STRINGS.CONTENT_TYPE, c)) {
return;
}
this.gotPragma = false;
if (this.quoteCharacter === 0) {
this.state = State.AttributeValueUnquoted;
this.stateAttributeValueUnquoted(c);
}
else {
this.state = State.AttributeValueQuoted;
this.stateAttributeValueQuoted(c);
}
};
Sniffer.prototype.handleMetaContentValue = function () {
if (this.attributeValue.length === 0)
return;
var encoding = String.fromCharCode.apply(String, this.attributeValue);
if (this.gotPragma) {
this.setResult(encoding, ResultType.META_TAG);
}
else if (this.needsPragma === null) {
// Don't override a previous result.
this.needsPragma = encoding;
}
this.attributeValue.length = 0;
};
Sniffer.prototype.handleAttributeValue = function () {
if (this.attribType === AttribType.Charset) {
this.setResult(String.fromCharCode.apply(String, this.attributeValue), ResultType.META_TAG);
}
};
Sniffer.prototype.stateAttributeValueUnquoted = function (c) {
if (SPACE_CHARACTERS.has(c)) {
this.handleAttributeValue();
this.state = State.BeforeAttribute;
}
else if (c === Chars.SLASH || c === Chars.GT) {
this.handleAttributeValue();
this.state = State.BeforeTag;
}
else if (this.attribType === AttribType.Charset) {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.findMetaContentEncoding = function (c) {
if (this.advanceSectionIC(STRINGS.CHARSET, c)) {
if (this.sectionIndex === STRINGS.CHARSET.length) {
return true;
}
}
else {
// If we encountered another `c`, assume we started over.
this.sectionIndex = Number(c === STRINGS.CHARSET[0]);
}
return false;
};
Sniffer.prototype.stateMetaContentValueUnquotedBeforeEncoding = function (c) {
if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
this.stateAttributeValueUnquoted(c);
}
else if (this.sectionIndex === STRINGS.CHARSET.length) {
if (c === Chars.EQUALS) {
this.state = State.MetaContentValueUnquotedBeforeValue;
}
}
else {
this.findMetaContentEncoding(c);
}
};
Sniffer.prototype.stateMetaContentValueUnquotedBeforeValue = function (c) {
if (isQuote(c)) {
this.quoteCharacter = c;
this.state = State.MetaContentValueUnquotedValueQuoted;
}
else if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
// Can't have spaces here, as it would no longer be part of the attribute value.
this.stateAttributeValueUnquoted(c);
}
else {
this.state = State.MetaContentValueUnquotedValueUnquoted;
this.stateMetaContentValueUnquotedValueUnquoted(c);
}
};
Sniffer.prototype.stateMetaContentValueUnquotedValueQuoted = function (c) {
if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
// Quotes weren't matched, so we're done.
this.stateAttributeValueUnquoted(c);
}
else if (c === this.quoteCharacter) {
this.handleMetaContentValue();
this.state = State.AttributeValueUnquoted;
}
else {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.stateMetaContentValueUnquotedValueUnquoted = function (c) {
if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c) || c === Chars.SEMICOLON) {
this.handleMetaContentValue();
this.state = State.AttributeValueUnquoted;
this.stateAttributeValueUnquoted(c);
}
else {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.stateMetaContentValueQuotedValueUnquoted = function (c) {
if (isQuote(c) || SPACE_CHARACTERS.has(c) || c === Chars.SEMICOLON) {
this.handleMetaContentValue();
// We are done with the value, but might not be at the end of the attribute
this.state = State.AttributeValueQuoted;
this.stateAttributeValueQuoted(c);
}
else {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.stateMetaContentValueQuotedValueQuoted = function (c) {
if (isQuote(c)) {
// We have reached the end of our value.
if (c !== this.quoteCharacter) {
// Only handle the value if inner quotes were matched.
this.handleMetaContentValue();
}
this.state = State.AttributeValueQuoted;
this.stateAttributeValueQuoted(c);
}
else {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.stateMetaContentValueQuotedBeforeEncoding = function (c) {
if (c === this.quoteCharacter) {
this.stateAttributeValueQuoted(c);
}
else if (this.findMetaContentEncoding(c)) {
this.state = State.MetaContentValueQuotedAfterEncoding;
}
};
Sniffer.prototype.stateMetaContentValueQuotedAfterEncoding = function (c) {
if (c === Chars.EQUALS) {
this.state = State.MetaContentValueQuotedBeforeValue;
}
else if (!SPACE_CHARACTERS.has(c)) {
// Look for the next encoding
this.state = State.MetaContentValueQuotedBeforeEncoding;
this.stateMetaContentValueQuotedBeforeEncoding(c);
}
};
Sniffer.prototype.stateMetaContentValueQuotedBeforeValue = function (c) {
if (c === this.quoteCharacter) {
this.stateAttributeValueQuoted(c);
}
else if (isQuote(c)) {
this.state = State.MetaContentValueQuotedValueQuoted;
}
else if (!SPACE_CHARACTERS.has(c)) {
this.state = State.MetaContentValueQuotedValueUnquoted;
this.stateMetaContentValueQuotedValueUnquoted(c);
}
};
Sniffer.prototype.stateAttributeValueQuoted = function (c) {
if (c === this.quoteCharacter) {
this.handleAttributeValue();
this.state = State.BeforeAttribute;
}
else if (this.attribType === AttribType.Charset) {
this.attributeValue.push(c | 0x20);
}
};
// Read STRINGS.XML_DECLARATION
Sniffer.prototype.stateXMLDeclaration = function (c) {
if (this.advanceSection(STRINGS.XML_DECLARATION, c)) {
if (this.sectionIndex === STRINGS.XML_DECLARATION.length) {
this.sectionIndex = 0;
this.state = State.XMLDeclarationBeforeEncoding;
}
}
else {
this.state = State.WeirdTag;
}
};
Sniffer.prototype.stateXMLDeclarationBeforeEncoding = function (c) {
if (this.advanceSection(STRINGS.ENCODING, c)) {
if (this.sectionIndex === STRINGS.ENCODING.length) {
this.state = State.XMLDeclarationAfterEncoding;
}
}
else if (c === Chars.GT) {
this.state = State.BeforeTag;
}
else {
// If we encountered another `c`, assume we started over.
this.sectionIndex = Number(c === STRINGS.ENCODING[0]);
}
};
Sniffer.prototype.stateXMLDeclarationAfterEncoding = function (c) {
if (c === Chars.EQUALS) {
this.state = State.XMLDeclarationBeforeValue;
}
else if (c > Chars.SPACE) {
this.state = State.WeirdTag;
this.stateWeirdTag(c);
}
};
Sniffer.prototype.stateXMLDeclarationBeforeValue = function (c) {
if (isQuote(c)) {
this.attributeValue.length = 0;
this.state = State.XMLDeclarationValue;
}
else if (c > Chars.SPACE) {
this.state = State.WeirdTag;
this.stateWeirdTag(c);
}
};
Sniffer.prototype.stateXMLDeclarationValue = function (c) {
if (isQuote(c)) {
this.setResult(String.fromCharCode.apply(String, this.attributeValue), ResultType.XML_ENCODING);
this.state = State.WeirdTag;
}
else if (c === Chars.GT) {
this.state = State.BeforeTag;
}
else if (c <= Chars.SPACE) {
this.state = State.WeirdTag;
}
else {
this.attributeValue.push(c | 0x20);
}
};
Sniffer.prototype.write = function (buffer) {
var index = 0;
for (; index < buffer.length && this.offset + index < this.maxBytes; index++) {
var c = buffer[index];
switch (this.state) {
case State.Begin: {
this.stateBegin(c);
break;
}
case State.BOM16BE: {
this.stateBOM16BE(c);
break;
}
case State.BOM16LE: {
this.stateBOM16LE(c);
break;
}
case State.BOM8: {
this.stateBOM8(c);
break;
}
case State.UTF16LE_XML_PREFIX: {
this.stateUTF16LE_XML_PREFIX(c);
break;
}
case State.BeginLT: {
this.stateBeginLT(c);
break;
}
case State.UTF16BE_XML_PREFIX: {
this.stateUTF16BE_XML_PREFIX(c);
break;
}
case State.BeforeTag: {
// Optimization: Skip all characters until we find a `<`
var idx = buffer.indexOf(Chars.LT, index);
if (idx < 0) {
// We are done with this buffer. Stay in the state and try on the next one.
index = buffer.length;
}
else {
index = idx;
this.stateBeforeTag(Chars.LT);
}
break;
}
case State.BeforeTagName: {
this.stateBeforeTagName(c);
break;
}
case State.BeforeCloseTagName: {
this.stateBeforeCloseTagName(c);
break;
}
case State.CommentStart: {
this.stateCommentStart(c);
break;
}
case State.CommentEnd: {
this.stateCommentEnd(c);
break;
}
case State.TagNameMeta: {
this.stateTagNameMeta(c);
break;
}
case State.TagNameOther: {
this.stateTagNameOther(c);
break;
}
case State.XMLDeclaration: {
this.stateXMLDeclaration(c);
break;
}
case State.XMLDeclarationBeforeEncoding: {
this.stateXMLDeclarationBeforeEncoding(c);
break;
}
case State.XMLDeclarationAfterEncoding: {
this.stateXMLDeclarationAfterEncoding(c);
break;
}
case State.XMLDeclarationBeforeValue: {
this.stateXMLDeclarationBeforeValue(c);
break;
}
case State.XMLDeclarationValue: {
this.stateXMLDeclarationValue(c);
break;
}
case State.WeirdTag: {
this.stateWeirdTag(c);
break;
}
case State.BeforeAttribute: {
this.stateBeforeAttribute(c);
break;
}
case State.MetaAttribHttpEquiv: {
this.stateMetaAttribHttpEquiv(c);
break;
}
case State.MetaAttribHttpEquivValue: {
this.stateMetaAttribHttpEquivValue(c);
break;
}
case State.MetaAttribC: {
this.stateMetaAttribC(c);
break;
}
case State.MetaAttribContent: {
this.stateMetaAttribContent(c);
break;
}
case State.MetaAttribCharset: {
this.stateMetaAttribCharset(c);
break;
}
case State.MetaAttribAfterName: {
this.stateMetaAttribAfterName(c);
break;
}
case State.MetaContentValueQuotedBeforeEncoding: {
this.stateMetaContentValueQuotedBeforeEncoding(c);
break;
}
case State.MetaContentValueQuotedAfterEncoding: {
this.stateMetaContentValueQuotedAfterEncoding(c);
break;
}
case State.MetaContentValueQuotedBeforeValue: {
this.stateMetaContentValueQuotedBeforeValue(c);
break;
}
case State.MetaContentValueQuotedValueQuoted: {
this.stateMetaContentValueQuotedValueQuoted(c);
break;
}
case State.MetaContentValueQuotedValueUnquoted: {
this.stateMetaContentValueQuotedValueUnquoted(c);
break;
}
case State.MetaContentValueUnquotedBeforeEncoding: {
this.stateMetaContentValueUnquotedBeforeEncoding(c);
break;
}
case State.MetaContentValueUnquotedBeforeValue: {
this.stateMetaContentValueUnquotedBeforeValue(c);
break;
}
case State.MetaContentValueUnquotedValueQuoted: {
this.stateMetaContentValueUnquotedValueQuoted(c);
break;
}
case State.MetaContentValueUnquotedValueUnquoted: {
this.stateMetaContentValueUnquotedValueUnquoted(c);
break;
}
case State.AnyAttribName: {
this.stateAnyAttribName(c);
break;
}
case State.AfterAttributeName: {
this.stateAfterAttributeName(c);
break;
}
case State.BeforeAttributeValue: {
this.stateBeforeAttributeValue(c);
break;
}
case State.AttributeValueQuoted: {
this.stateAttributeValueQuoted(c);
break;
}
default: {
// (State.AttributeValueUnquoted)
this.stateAttributeValueUnquoted(c);
}
}
}
this.offset += index;
};
return Sniffer;
}());
export { Sniffer };
/** Get the encoding for the passed buffer. */
export function getEncoding(buffer, options) {
var sniffer = new Sniffer(options);
sniffer.write(buffer);
return sniffer.encoding;
}
//# sourceMappingURL=sniffer.js.map

File diff suppressed because one or more lines are too long

108
book/node_modules/encoding-sniffer/package.json generated vendored Normal file
View File

@@ -0,0 +1,108 @@
{
"name": "encoding-sniffer",
"version": "0.2.0",
"description": "Implementation of the HTML encoding sniffer algo, with stream support",
"bugs": {
"url": "https://github.com/fb55/encoding-sniffer/issues"
},
"repository": {
"type": "git",
"url": "git://github.com/fb55/encoding-sniffer.git"
},
"funding": "https://github.com/fb55/encoding-sniffer?sponsor=1",
"license": "MIT",
"author": "Felix Boehm <me@feedic.com>",
"type": "module",
"exports": {
".": {
"import": {
"source": "./src/index.ts",
"types": "./dist/esm/index.d.ts",
"default": "./dist/esm/index.js"
},
"require": {
"source": "./src/index.ts",
"types": "./dist/commonjs/index.d.ts",
"default": "./dist/commonjs/index.js"
}
},
"./sniffer": {
"import": {
"source": "./src/sniffer.ts",
"types": "./dist/esm/sniffer.d.ts",
"default": "./dist/esm/sniffer.js"
},
"require": {
"source": "./src/sniffer.ts",
"types": "./dist/commonjs/sniffer.d.ts",
"default": "./dist/commonjs/sniffer.js"
}
}
},
"main": "./dist/commonjs/index.js",
"types": "./dist/commonjs/index.d.ts",
"files": [
"dist"
],
"scripts": {
"build": "tshy",
"build:docs": "typedoc --hideGenerator src/index.ts",
"format": "npm run format:es && npm run format:prettier",
"format:es": "npm run lint:es -- --fix",
"format:prettier": "npm run prettier -- --write",
"lint": "npm run lint:es && npm run lint:prettier",
"lint:es": "eslint --ignore-path .gitignore .",
"lint:prettier": "npm run prettier -- --check",
"make-cjs": "sed -i '' 's/\"type\": \"module\"/\"type\": \"commonjs\"/g' package.json",
"make-esm": "sed -i '' 's/\"type\": \"commonjs\"/\"type\": \"module\"/g' package.json",
"prepublishOnly": "npm run build",
"prettier": "prettier '**/*.{ts,md,json,yml}'",
"test": "npm run test:jest && npm run lint",
"test:jest": "jest"
},
"prettier": {
"proseWrap": "always",
"tabWidth": 4
},
"jest": {
"coverageProvider": "v8",
"moduleNameMapper": {
"^(.*)\\.js$": [
"$1",
"$1.js"
]
},
"preset": "ts-jest",
"testEnvironment": "node"
},
"dependencies": {
"iconv-lite": "^0.6.3",
"whatwg-encoding": "^3.1.1"
},
"devDependencies": {
"@types/jest": "^29.5.12",
"@types/node": "^20.14.2",
"@types/whatwg-encoding": "^2.0.3",
"@typescript-eslint/eslint-plugin": "^7.13.0",
"@typescript-eslint/parser": "^7.13.0",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-n": "^17.9.0",
"eslint-plugin-unicorn": "^54.0.0",
"jest": "^29.7.0",
"prettier": "^3.3.2",
"ts-jest": "^29.1.4",
"tshy": "^1.15.1",
"typedoc": "^0.25.13",
"typescript": "^5.4.5"
},
"tshy": {
"exports": {
".": "./src/index.ts",
"./sniffer": "./src/sniffer.ts"
},
"exclude": [
"src/**/*.spec.ts"
]
}
}