Skip to content

Commit 74fed43

Browse files
committed
lib: implement all 1-byte encodings in js
1 parent 2e597de commit 74fed43

File tree

6 files changed

+190
-164
lines changed

6 files changed

+190
-164
lines changed

lib/internal/encoding.js

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,27 @@ const {
1313
StringPrototypeSlice,
1414
Symbol,
1515
SymbolToStringTag,
16-
Uint8Array,
1716
} = primordials;
1817

18+
const { FastBuffer } = require('internal/buffer');
19+
1920
const {
2021
ERR_ENCODING_NOT_SUPPORTED,
2122
ERR_INVALID_ARG_TYPE,
2223
ERR_INVALID_THIS,
2324
ERR_NO_ICU,
2425
} = require('internal/errors').codes;
26+
const kMethod = Symbol('method');
2527
const kHandle = Symbol('handle');
2628
const kFlags = Symbol('flags');
2729
const kEncoding = Symbol('encoding');
2830
const kDecoder = Symbol('decoder');
2931
const kFatal = Symbol('kFatal');
3032
const kUTF8FastPath = Symbol('kUTF8FastPath');
31-
const kWindows1252FastPath = Symbol('kWindows1252FastPath');
3233
const kIgnoreBOM = Symbol('kIgnoreBOM');
3334

35+
const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');
36+
3437
const {
3538
getConstructorOf,
3639
customInspectSymbol: inspect,
@@ -55,11 +58,8 @@ const {
5558
encodeIntoResults,
5659
encodeUtf8String,
5760
decodeUTF8,
58-
decodeWindows1252,
5961
} = binding;
6062

61-
const { Buffer } = require('buffer');
62-
6363
function validateDecoder(obj) {
6464
if (obj == null || obj[kDecoder] !== true)
6565
throw new ERR_INVALID_THIS('TextDecoder');
@@ -69,7 +69,7 @@ const CONVERTER_FLAGS_FLUSH = 0x1;
6969
const CONVERTER_FLAGS_FATAL = 0x2;
7070
const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
7171

72-
const empty = new Uint8Array(0);
72+
const empty = new FastBuffer();
7373

7474
const encodings = new SafeMap([
7575
['unicode-1-1-utf-8', 'utf-8'],
@@ -387,6 +387,24 @@ ObjectDefineProperties(
387387
[SymbolToStringTag]: { __proto__: null, configurable: true, value: 'TextEncoder' },
388388
});
389389

390+
function parseInput(input) {
391+
if (isAnyArrayBuffer(input)) {
392+
try {
393+
return new FastBuffer(input);
394+
} catch {
395+
return empty;
396+
}
397+
} else if (isArrayBufferView(input)) {
398+
try {
399+
return new FastBuffer(input.buffer, input.byteOffset, input.byteLength);
400+
} catch {
401+
return empty;
402+
}
403+
} else {
404+
throw new ERR_INVALID_ARG_TYPE('input', ['ArrayBuffer', 'ArrayBufferView'], input);
405+
}
406+
}
407+
390408
const TextDecoder =
391409
internalBinding('config').hasIntl ?
392410
makeTextDecoderICU() :
@@ -420,10 +438,12 @@ function makeTextDecoderICU() {
420438
this[kFatal] = Boolean(options?.fatal);
421439
// Only support fast path for UTF-8.
422440
this[kUTF8FastPath] = enc === 'utf-8';
423-
this[kWindows1252FastPath] = enc === 'windows-1252';
424441
this[kHandle] = undefined;
442+
this[kMethod] = undefined;
425443

426-
if (!this[kUTF8FastPath] && !this[kWindows1252FastPath]) {
444+
if (isSinglebyteEncoding(this.encoding)) {
445+
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
446+
} else if (!this[kUTF8FastPath]) {
427447
this.#prepareConverter();
428448
}
429449
}
@@ -438,22 +458,18 @@ function makeTextDecoderICU() {
438458

439459
decode(input = empty, options = kEmptyObject) {
440460
validateDecoder(this);
461+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
462+
463+
if (this[kMethod]) return this[kMethod](parseInput(input));
441464

442465
this[kUTF8FastPath] &&= !(options?.stream);
443-
this[kWindows1252FastPath] &&= !(options?.stream);
444466

445467
if (this[kUTF8FastPath]) {
446468
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
447469
}
448470

449-
if (this[kWindows1252FastPath]) {
450-
return decodeWindows1252(input, this[kIgnoreBOM], this[kFatal]);
451-
}
452-
453471
this.#prepareConverter();
454472

455-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
456-
457473
let flags = 0;
458474
if (options !== null)
459475
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
@@ -476,7 +492,7 @@ function makeTextDecoderJS() {
476492
const kBOMSeen = Symbol('BOM seen');
477493

478494
function hasConverter(encoding) {
479-
return encoding === 'utf-8' || encoding === 'utf-16le';
495+
return encoding === 'utf-8' || encoding === 'utf-16le' || isSinglebyteEncoding(encoding);
480496
}
481497

482498
class TextDecoder {
@@ -502,30 +518,20 @@ function makeTextDecoderJS() {
502518
this[kFlags] = flags;
503519
this[kEncoding] = enc;
504520
this[kBOMSeen] = false;
521+
this[kMethod] = undefined;
522+
523+
if (isSinglebyteEncoding(this.encoding)) {
524+
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
525+
}
505526
}
506527

507528
decode(input = empty, options = kEmptyObject) {
508529
validateDecoder(this);
509-
if (isAnyArrayBuffer(input)) {
510-
try {
511-
input = Buffer.from(input);
512-
} catch {
513-
input = empty;
514-
}
515-
} else if (isArrayBufferView(input)) {
516-
try {
517-
input = Buffer.from(input.buffer, input.byteOffset,
518-
input.byteLength);
519-
} catch {
520-
input = empty;
521-
}
522-
} else {
523-
throw new ERR_INVALID_ARG_TYPE('input',
524-
['ArrayBuffer', 'ArrayBufferView'],
525-
input);
526-
}
530+
input = parseInput(input);
527531
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
528532

533+
if (this[kMethod]) return this[kMethod](input);
534+
529535
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
530536
this[kBOMSeen] = false;
531537
}
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// Simplified version extracted from https://npmjs.com/package/@exodus/bytes codepath for 1-byte encodings
2+
// Copyright Exodus Movement. Licensed under MIT License.
3+
4+
'use strict';
5+
6+
const {
7+
Array,
8+
ArrayPrototypeFill,
9+
ObjectKeys,
10+
ObjectPrototypeHasOwnProperty,
11+
SafeArrayIterator,
12+
SafeMap,
13+
SafeSet,
14+
StringPrototypeIncludes,
15+
TypedArrayFrom,
16+
Uint16Array,
17+
} = primordials;
18+
19+
const { Buffer, isAscii } = require('buffer');
20+
21+
const {
22+
ERR_ENCODING_NOT_SUPPORTED,
23+
ERR_ENCODING_INVALID_ENCODED_DATA,
24+
} = require('internal/errors').codes;
25+
26+
let isBigEndian;
27+
28+
const it = (x) => new SafeArrayIterator(x);
29+
30+
/* fallback/single-byte.encodings.js */
31+
32+
const r = 0xfffd;
33+
const e = (x) => it(ArrayPrototypeFill(new Array(x), 1));
34+
const h = (x) => it(ArrayPrototypeFill(new Array(x), r));
35+
36+
/* eslint-disable @stylistic/js/max-len */
37+
38+
// Index tables from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings
39+
// Each table in the spec lists only mapping from byte 0x80 onwards, as below that they are all ASCII and mapped as idenity
40+
// Here, 0xfffd designates a hole (unmapped offset), as not all encodings map all offsets
41+
// All other numbers are deltas from the last seen mapped value, starting with 0x7f (127, highest ASCII)
42+
// Thus, [0x80, 0x81, , 0x83] is stored as [1, 1, f, 2]
43+
// Truncation (length < 128) means that all remaining ones are mapped as identity (offset i => codepoint i), not unmapped
44+
const encodings = {
45+
'__proto__': null,
46+
'ibm866': [913, ...e(47), 8530, 1, 1, -145, 34, 61, 1, -12, -1, 14, -18, 6, 6, -1, -1, -75, 4, 32, -8, -16, -28, 60, 34, 1, -5, -6, 21, -3, -6, -16, 28, -5, 1, -4, 1, -12, -1, -6, 1, 24, -1, -82, -12, 124, -4, 8, 4, -16, -8512, ...e(15), -78, 80, -77, 80, -77, 80, -73, 80, -942, 8553, -8546, 8547, -260, -8306, 9468, -9472],
47+
'iso-8859-10': [...e(33), 100, 14, 16, 8, -2, 14, -143, 148, -43, 80, 6, 23, -208, 189, -32, -154, 85, 14, 16, 8, -2, 14, -128, 133, -43, 80, 6, 23, 7831, -7850, -32, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 1, 1, 117, 7, -121, 1, 1, 1, 146, -144, 154, -152, ...e(5), 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 1, 1, 86, 7, -90, 1, 1, 1, 115, -113, 123, -121, 1, 1, 1, 1, 58],
48+
'iso-8859-13': [...e(33), 8061, -8059, 1, 1, 8058, -8056, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, 1, 1, 1, 8041, -8039, 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 7835],
49+
'iso-8859-14': [...e(33), 7522, 1, -7520, 103, 1, 7423, -7523, 7641, -7639, 7641, -119, 231, -7749, 1, 202, 7334, 1, -7423, 1, 7455, 1, -7563, 7584, 43, -42, 44, -35, 147, -111, 1, -36, -7585, ...e(15), 165, -163, ...e(5), 7572, -7570, ...e(5), 153, -151, ...e(16), 134, -132, ...e(5), 7541, -7539, ...e(5), 122],
50+
'iso-8859-15': [...e(33), 1, 1, 1, 8201, -8199, 187, -185, 186, -184, ...e(10), 202, -200, 1, 1, 199, -197, 1, 1, 151, 1, 37],
51+
'iso-8859-16': [...e(33), 100, 1, 60, 8043, -142, -7870, -185, 186, -184, 367, -365, 206, -204, 205, 1, -203, 1, 91, 54, 59, 7840, -8039, 1, 199, -113, 268, -350, 151, 1, 37, 4, -188, 1, 1, 64, -62, 66, -64, ...e(9), 65, 51, -113, 1, 1, 124, -122, 132, 22, -151, 1, 1, 1, 60, 258, -315, 1, 1, 1, 33, -31, 35, -33, ...e(9), 34, 51, -82, 1, 1, 93, -91, 101, 22, -120, 1, 1, 1, 29, 258],
52+
'iso-8859-2': [...e(33), 100, 468, -407, -157, 153, 29, -179, 1, 184, -2, 6, 21, -204, 208, -2, -203, 85, 470, -409, -142, 138, 29, 364, -527, 169, -2, 6, 21, 355, -351, -2, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374],
53+
'iso-8859-3': [...e(33), 134, 434, -565, 1, r, 128, -125, 1, 136, 46, -64, 22, -135, r, 206, -203, 119, -117, 1, 1, 1, 112, -110, 1, 121, 46, -64, 22, -120, r, 191, -188, 1, 1, r, 2, 70, -2, -65, ...e(8), r, 2, 1, 1, 1, 76, -74, 1, 69, -67, 1, 1, 1, 144, -16, -125, 1, 1, 1, r, 2, 39, -2, -34, ...e(8), r, 2, 1, 1, 1, 45, -43, 1, 38, -36, 1, 1, 1, 113, -16, 380],
54+
'iso-8859-4': [...e(33), 100, 52, 30, -178, 132, 19, -148, 1, 184, -78, 16, 68, -185, 208, -206, 1, 85, 470, -388, -163, 117, 19, 395, -527, 169, -78, 16, 68, -29, 52, -51, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 92, -26, 53, 7, -22, -98, 1, 1, 1, 1, 154, -152, 1, 1, 140, 2, -139, 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 61, -26, 53, 7, -22, -67, 1, 1, 1, 1, 123, -121, 1, 1, 109, 2, 366],
55+
'iso-8859-5': [...e(33), 865, ...e(11), -863, 865, ...e(65), 7367, -7365, ...e(11), -949, 951, 1],
56+
'iso-8859-6': [...e(33), r, r, r, 4, ...h(7), 1384, -1375, ...h(13), 1390, r, r, r, 4, r, 2, ...e(25), r, r, r, r, r, 6, ...e(18), ...h(13)],
57+
'iso-8859-7': [...e(33), 8056, 1, -8054, 8201, 3, -8201, 1, 1, 1, 721, -719, 1, 1, r, 8040, -8037, 1, 1, 1, 721, 1, 1, -719, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), 64559],
58+
'iso-8859-8': [...e(33), r, 2, ...e(7), 46, -44, ...e(14), 62, -60, 1, 1, 1, ...h(32), 8025, -6727, ...e(26), 64019, 0, -57327, 1, 57326],
59+
'koi8-r': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 8450, ...e(14), -8544, 8545, ...e(10), -9411, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3],
60+
'koi8-u': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 3, 8448, -8446, 1, 8448, 1, 1, 1, 1, -8394, -51, 8448, 1, 1, 1, -8544, 3, 8543, -8541, 1, 8543, 1, 1, 1, 1, -8410, -130, -869, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3],
61+
'macintosh': [69, 1, 2, 2, 8, 5, 6, 5, -1, 2, 2, -1, 2, 2, 2, -1, 2, 1, 2, -1, 2, 1, 2, 2, -1, 2, 2, -1, 5, -1, 2, 1, 7972, -8048, -14, 1, 4, 8059, -8044, 41, -49, -5, 8313, -8302, -12, 8632, -8602, 18, 8518, -8557, 8627, 1, -8640, 16, 8525, 15, -2, -7759, 7787, -8577, 16, 751, -707, 18, -57, -30, 11, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 32, 3, 18, 125, 1, 7872, 1, 8, 1, -5, 1, -7970, 9427, -9419, 121, 7884, 104, -115, 1, 56007, 1, -56033, -8042, 8035, 4, 18, -8046, 8, -9, 10, -3, 5, 1, 1, -3, 7, 1, 63531, -63533, 8, 1, -2, 88, 405, 22, -557, 553, 1, 1, -546, 549, -2, -20],
62+
'windows-1250': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -7888, 7897, -7903, 10, 25, -4, -233, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8129, 7897, -7903, 10, 25, -4, -218, 551, 17, -407, -157, 96, -94, 1, 1, 1, 181, -179, 1, 1, 1, 205, -203, 1, 554, -409, -142, 1, 1, 1, 1, 77, 90, -164, 130, 416, -415, 62, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374],
63+
'windows-1251': [899, 1, 7191, -7111, 7115, 8, -6, 1, 139, -124, -7207, 7216, -7215, 2, -1, 4, 67, 7110, 1, 3, 1, 5, -15, 1, -8060, 8330, -7369, 7137, -7136, 2, -1, 4, -959, 878, 80, -86, -868, 1004, -1002, 1, 858, -856, 859, -857, 1, 1, 1, 857, -855, 1, 853, 80, 59, -988, 1, 1, 922, 7365, -7362, -921, 925, -83, 80, 2, -71, ...e(63)],
64+
'windows-1252': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 240, -238, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 225, -6],
65+
'windows-1253': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 1, 1, 1, 1, 741, 1, -739, 1, 1, 1, 1, 1, 1, r, 2, 1, 1, 1, 8039, -8037, 1, 1, 1, 721, -719, 1, 1, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), 64559],
66+
'windows-1254': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 1, 218, -216, ...e(47), 79, -77, ...e(11), 84, 46, -127, ...e(16), 48, -46, ...e(11), 53, 46],
67+
'windows-1255': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -8094, ...e(7), 8199, -8197, 1, 1, 1, 1, 46, -44, ...e(14), 62, -60, 1, 1, 1, 1, 1265, ...e(19), 45, 1, 1, 1, 1, ...h(7), -36, ...e(26), 64019, 0, -57327, 1, 57326],
68+
'windows-1256': [8237, -6702, 6556, -7816, 7820, 8, -6, 1, -7515, 7530, -6583, 6592, -7911, 1332, 18, -16, 39, 6505, 1, 3, 1, 5, -15, 1, -6507, 6777, -6801, 6569, -7911, 7865, 1, -6483, -1562, 1388, -1386, ...e(7), 1557, -1555, ...e(14), 1378, -1376, 1, 1, 1, 1377, 162, -160, ...e(21), -1375, 1376, 1, 1, 1, 6, 1, 1, 1, -1379, 1380, -1378, 1379, 1, 1, 1, -1377, 1, 1, 1, 1, 1374, 1, -1372, 1, 1372, 1, 1, 1, -1370, 1371, 1, -1369, 1370, -1368, 1369, -1367, 1, 7954, 1, -6461],
69+
'windows-1257': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 28, 543, -527, -40, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 19, 556, -572, 1, r, 2, 1, 1, r, 2, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, ...e(5), 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 347],
70+
'windows-1258': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -7911, -182, 1, 218, -216, ...e(34), 64, -62, ...e(7), 565, -563, 1, 1, 65, -63, 568, -566, 1, 204, -202, 1, 1, 1, 1, 1, 1, 211, 340, -548, 1, 1, 1, 33, -31, ...e(7), 534, -532, 1, 1, 34, -32, 562, -560, 1, 173, -171, 1, 1, 1, 1, 1, 1, 180, 7931],
71+
'windows-874': [8237, -8235, 1, 1, 1, 8098, -8096, ...e(10), 8072, 1, 3, 1, 5, -15, 1, -8060, ...e(8), 3425, ...e(57), r, r, r, r, 5, ...e(28), r, r, r, r],
72+
'x-mac-cyrillic': [913, ...e(31), 7153, -8048, 992, -1005, 4, 8059, -8044, 848, -856, -5, 8313, -7456, 80, 7694, -7773, 80, 7627, -8557, 8627, 1, -7695, -929, 988, -137, -4, 80, -77, 80, -78, 80, -79, 80, -2, -83, -857, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 875, 80, -79, 80, -7, 7102, 1, 8, 1, -5, 1, -7970, 7975, -7184, 80, -79, 80, 7351, -7445, 80, -2, -31, ...e(30), 7262],
73+
};
74+
75+
/* eslint-enable @stylistic/js/max-len */
76+
77+
/* fallback/single-byte.js + single-byte.node.js, simplified */
78+
79+
const l256 = { __proto__: null, length: 256 };
80+
81+
function getEncoding(encoding) {
82+
if (encoding === 'x-user-defined') {
83+
// https://encoding.spec.whatwg.org/#x-user-defined-decoder, 14.5.1. x-user-defined decoder
84+
return TypedArrayFrom(Uint16Array, l256, (_, i) => (i >= 0x80 ? 0xf700 + i : i));
85+
}
86+
87+
if (!ObjectPrototypeHasOwnProperty(encodings, encoding)) {
88+
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
89+
}
90+
91+
const map = TypedArrayFrom(Uint16Array, l256, (_, i) => i); // Unicode subset
92+
let prev = 127;
93+
map.set(TypedArrayFrom(Uint16Array, it(encodings[encoding]), (x) => (x === r ? x : (prev += x))), 128);
94+
return map;
95+
}
96+
97+
const supported = new SafeSet(it(ObjectKeys(encodings))).add('iso-8859-8-i').add('x-user-defined');
98+
const isSinglebyteEncoding = (enc) => supported.has(enc);
99+
100+
const decodersLoose = new SafeMap();
101+
const decodersFatal = new SafeMap();
102+
103+
function createSinglebyteDecoder(encoding, fatal) {
104+
const id = encoding === 'iso-8859-8-i' ? 'iso-8859-8' : encoding;
105+
const decoders = fatal ? decodersFatal : decodersLoose;
106+
const cached = decoders.get(id);
107+
if (cached) return cached;
108+
109+
const map = getEncoding(id);
110+
const incomplete = map.includes(r);
111+
if (isBigEndian === undefined) isBigEndian = internalBinding('os').isBigEndian;
112+
113+
// Expects type-checked Buffer input
114+
const decoder = (buf) => {
115+
if (buf.byteLength === 0) return '';
116+
if (isAscii(buf)) return buf.latin1Slice(); // .latin1Slice is faster than .asciiSlice
117+
const o = TypedArrayFrom(Uint16Array, buf); // Copy to modify in-place, also those are 16-bit now
118+
119+
let i = 0;
120+
for (const end7 = o.length - 7; i < end7; i += 8) {
121+
o[i] = map[o[i]];
122+
o[i + 1] = map[o[i + 1]];
123+
o[i + 2] = map[o[i + 2]];
124+
o[i + 3] = map[o[i + 3]];
125+
o[i + 4] = map[o[i + 4]];
126+
o[i + 5] = map[o[i + 5]];
127+
o[i + 6] = map[o[i + 6]];
128+
o[i + 7] = map[o[i + 7]];
129+
}
130+
131+
for (const end = o.length; i < end; i++) o[i] = map[o[i]];
132+
133+
const b = Buffer.from(o.buffer, o.byteOffset, o.byteLength);
134+
if (isBigEndian) b.swap16();
135+
const string = b.ucs2Slice();
136+
if (fatal && incomplete && StringPrototypeIncludes(string, '\uFFFD')) {
137+
throw new ERR_ENCODING_INVALID_ENCODED_DATA(encoding, undefined);
138+
}
139+
return string;
140+
};
141+
142+
decoders.set(id, decoder);
143+
return decoder;
144+
}
145+
146+
module.exports = {
147+
isSinglebyteEncoding,
148+
createSinglebyteDecoder,
149+
getEncoding, // for tests
150+
};

0 commit comments

Comments
 (0)