| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- "use strict";
- var __importDefault = (this && this.__importDefault) || function (mod) {
- return (mod && mod.__esModule) ? mod : { "default": mod };
- };
- Object.defineProperty(exports, "__esModule", { value: true });
- exports.decodeHtml = void 0;
- const namedChars_json_1 = __importDefault(require("./namedChars.json"));
- // lazy compute this to make this file tree-shakable for browser
- let maxCRNameLength;
- const decodeHtml = (rawText, asAttr) => {
- let offset = 0;
- const end = rawText.length;
- let decodedText = '';
- function advance(length) {
- offset += length;
- rawText = rawText.slice(length);
- }
- while (offset < end) {
- const head = /&(?:#x?)?/i.exec(rawText);
- if (!head || offset + head.index >= end) {
- const remaining = end - offset;
- decodedText += rawText.slice(0, remaining);
- advance(remaining);
- break;
- }
- // Advance to the "&".
- decodedText += rawText.slice(0, head.index);
- advance(head.index);
- if (head[0] === '&') {
- // Named character reference.
- let name = '';
- let value = undefined;
- if (/[0-9a-z]/i.test(rawText[1])) {
- if (!maxCRNameLength) {
- maxCRNameLength = Object.keys(namedChars_json_1.default).reduce((max, name) => Math.max(max, name.length), 0);
- }
- for (let length = maxCRNameLength; !value && length > 0; --length) {
- name = rawText.slice(1, 1 + length);
- value = namedChars_json_1.default[name];
- }
- if (value) {
- const semi = name.endsWith(';');
- if (asAttr &&
- !semi &&
- /[=a-z0-9]/i.test(rawText[name.length + 1] || '')) {
- decodedText += '&' + name;
- advance(1 + name.length);
- }
- else {
- decodedText += value;
- advance(1 + name.length);
- }
- }
- else {
- decodedText += '&' + name;
- advance(1 + name.length);
- }
- }
- else {
- decodedText += '&';
- advance(1);
- }
- }
- else {
- // Numeric character reference.
- const hex = head[0] === '&#x';
- const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/;
- const body = pattern.exec(rawText);
- if (!body) {
- decodedText += head[0];
- advance(head[0].length);
- }
- else {
- // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
- let cp = Number.parseInt(body[1], hex ? 16 : 10);
- if (cp === 0) {
- cp = 0xfffd;
- }
- else if (cp > 0x10ffff) {
- cp = 0xfffd;
- }
- else if (cp >= 0xd800 && cp <= 0xdfff) {
- cp = 0xfffd;
- }
- else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
- // noop
- }
- else if ((cp >= 0x01 && cp <= 0x08) ||
- cp === 0x0b ||
- (cp >= 0x0d && cp <= 0x1f) ||
- (cp >= 0x7f && cp <= 0x9f)) {
- cp = CCR_REPLACEMENTS[cp] || cp;
- }
- decodedText += String.fromCodePoint(cp);
- advance(body[0].length);
- }
- }
- }
- return decodedText;
- };
- exports.decodeHtml = decodeHtml;
- // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
- const CCR_REPLACEMENTS = {
- 0x80: 0x20ac,
- 0x82: 0x201a,
- 0x83: 0x0192,
- 0x84: 0x201e,
- 0x85: 0x2026,
- 0x86: 0x2020,
- 0x87: 0x2021,
- 0x88: 0x02c6,
- 0x89: 0x2030,
- 0x8a: 0x0160,
- 0x8b: 0x2039,
- 0x8c: 0x0152,
- 0x8e: 0x017d,
- 0x91: 0x2018,
- 0x92: 0x2019,
- 0x93: 0x201c,
- 0x94: 0x201d,
- 0x95: 0x2022,
- 0x96: 0x2013,
- 0x97: 0x2014,
- 0x98: 0x02dc,
- 0x99: 0x2122,
- 0x9a: 0x0161,
- 0x9b: 0x203a,
- 0x9c: 0x0153,
- 0x9e: 0x017e,
- 0x9f: 0x0178,
- };
|