Skip to content

Commit d49cc98

Browse files
committed
fix: implement all 1-byte encodings in js
1 parent 2e597de commit d49cc98

File tree

6 files changed

+190
-160
lines changed

6 files changed

+190
-160
lines changed

lib/internal/encoding.js

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,17 @@ const {
2222
ERR_INVALID_THIS,
2323
ERR_NO_ICU,
2424
} = require('internal/errors').codes;
25+
const kMethod = Symbol('method');
2526
const kHandle = Symbol('handle');
2627
const kFlags = Symbol('flags');
2728
const kEncoding = Symbol('encoding');
2829
const kDecoder = Symbol('decoder');
2930
const kFatal = Symbol('kFatal');
3031
const kUTF8FastPath = Symbol('kUTF8FastPath');
31-
const kWindows1252FastPath = Symbol('kWindows1252FastPath');
3232
const kIgnoreBOM = Symbol('kIgnoreBOM');
3333

34+
const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');
35+
3436
const {
3537
getConstructorOf,
3638
customInspectSymbol: inspect,
@@ -55,7 +57,6 @@ const {
5557
encodeIntoResults,
5658
encodeUtf8String,
5759
decodeUTF8,
58-
decodeWindows1252,
5960
} = binding;
6061

6162
const { Buffer } = require('buffer');
@@ -69,7 +70,7 @@ const CONVERTER_FLAGS_FLUSH = 0x1;
6970
const CONVERTER_FLAGS_FATAL = 0x2;
7071
const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
7172

72-
const empty = new Uint8Array(0);
73+
const empty = Buffer.alloc(0);
7374

7475
const encodings = new SafeMap([
7576
['unicode-1-1-utf-8', 'utf-8'],
@@ -387,6 +388,27 @@ ObjectDefineProperties(
387388
[SymbolToStringTag]: { __proto__: null, configurable: true, value: 'TextEncoder' },
388389
});
389390

391+
function parseInput(input) {
392+
if (isAnyArrayBuffer(input)) {
393+
try {
394+
return Buffer.from(input);
395+
} catch {
396+
return empty;
397+
}
398+
} else if (isArrayBufferView(input)) {
399+
try {
400+
return Buffer.from(input.buffer, input.byteOffset,
401+
input.byteLength);
402+
} catch {
403+
return empty;
404+
}
405+
} else {
406+
throw new ERR_INVALID_ARG_TYPE('input',
407+
['ArrayBuffer', 'ArrayBufferView'],
408+
input);
409+
}
410+
}
411+
390412
const TextDecoder =
391413
internalBinding('config').hasIntl ?
392414
makeTextDecoderICU() :
@@ -420,10 +442,12 @@ function makeTextDecoderICU() {
420442
this[kFatal] = Boolean(options?.fatal);
421443
// Only support fast path for UTF-8.
422444
this[kUTF8FastPath] = enc === 'utf-8';
423-
this[kWindows1252FastPath] = enc === 'windows-1252';
424445
this[kHandle] = undefined;
446+
this[kMethod] = undefined;
425447

426-
if (!this[kUTF8FastPath] && !this[kWindows1252FastPath]) {
448+
if (isSinglebyteEncoding(this.encoding)) {
449+
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
450+
} else if (!this[kUTF8FastPath]) {
427451
this.#prepareConverter();
428452
}
429453
}
@@ -438,22 +462,18 @@ function makeTextDecoderICU() {
438462

439463
decode(input = empty, options = kEmptyObject) {
440464
validateDecoder(this);
465+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
466+
467+
if (this[kMethod]) return this[kMethod](parseInput(input));
441468

442469
this[kUTF8FastPath] &&= !(options?.stream);
443-
this[kWindows1252FastPath] &&= !(options?.stream);
444470

445471
if (this[kUTF8FastPath]) {
446472
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
447473
}
448474

449-
if (this[kWindows1252FastPath]) {
450-
return decodeWindows1252(input, this[kIgnoreBOM], this[kFatal]);
451-
}
452-
453475
this.#prepareConverter();
454476

455-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
456-
457477
let flags = 0;
458478
if (options !== null)
459479
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
@@ -502,30 +522,20 @@ function makeTextDecoderJS() {
502522
this[kFlags] = flags;
503523
this[kEncoding] = enc;
504524
this[kBOMSeen] = false;
525+
this[kMethod] = undefined;
526+
527+
if (isSinglebyteEncoding(this.encoding)) {
528+
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
529+
}
505530
}
506531

507532
decode(input = empty, options = kEmptyObject) {
508533
validateDecoder(this);
509-
if (isAnyArrayBuffer(input)) {
510-
try {
511-
input = Buffer.from(input);
512-
} catch {
513-
input = empty;
514-
}
515-
} else if (isArrayBufferView(input)) {
516-
try {
517-
input = Buffer.from(input.buffer, input.byteOffset,
518-
input.byteLength);
519-
} catch {
520-
input = empty;
521-
}
522-
} else {
523-
throw new ERR_INVALID_ARG_TYPE('input',
524-
['ArrayBuffer', 'ArrayBufferView'],
525-
input);
526-
}
534+
input = parseInput(input);
527535
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
528536

537+
if (this[kMethod]) return this[kMethod](input);
538+
529539
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
530540
this[kBOMSeen] = false;
531541
}
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// Simplified version extracted from https://npmjs.com/package/@exodus/bytes codepath for 1-byte encodings
2+
// Copyright Exodus Movement. Licensed under MIT License.
3+
4+
'use strict';
5+
6+
const {
7+
ArrayFrom,
8+
ObjectKeys,
9+
ObjectPrototypeHasOwnProperty,
10+
SafeMap,
11+
SafeSet,
12+
StringPrototypeIncludes,
13+
TypedArrayFrom,
14+
} = primordials;
15+
16+
const { Buffer, isAscii } = require('buffer');
17+
18+
const {
19+
ERR_ENCODING_NOT_SUPPORTED,
20+
ERR_ENCODING_INVALID_ENCODED_DATA,
21+
} = require('internal/errors').codes;
22+
23+
const { isBigEndian } = internalBinding('os');
24+
25+
/* fallback/single-byte.encodings.js, prepared differently */
26+
27+
const e = (length) => ArrayFrom({ length }, () => 1);
28+
const z = (length) => ArrayFrom({ length }, () => 0);
29+
30+
/* eslint-disable @stylistic/js/max-len */
31+
32+
// Common ranges
33+
34+
const k8a = [9472, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449];
35+
const k8b = [-30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3];
36+
const p1 = [8364, -8235, 8089, -7816, 7820, 8, -6, 1];
37+
const p2 = [-99, 12, 20, -12, 17, 37, -29, 2];
38+
const p3 = [1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, ...p2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, ...p2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2];
39+
const i0 = [128, ...e(32)];
40+
const i2 = [-40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374];
41+
const i4a = [-75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1];
42+
const i4b = [34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1];
43+
const i7 = [721, 1, 1, -719, 721, -719, 721, ...e(19), 64604, -64602, ...e(43), 64559];
44+
const i8 = [...e(26), 64019, 0, -57327, 1, 57326];
45+
const w0 = [8364, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104];
46+
const w8 = [8072, 1, 3, 1, 5, -15, 1];
47+
const w1 = [...w8, -7480, 7750, -8129, 7897, -7911, -182];
48+
const w3 = [...w8, -8060, 8330, -8328, 8096, -8094];
49+
const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070];
50+
51+
const encodings = {
52+
'__proto__': null,
53+
'ibm866': [1040, ...e(47), 8530, 1, 1, -145, 34, 61, 1, -12, -1, 14, -18, 6, 6, -1, -1, -75, 4, 32, -8, -16, -28, 60, 34, 1, -5, -6, 21, -3, -6, -16, 28, -5, 1, -4, 1, -12, -1, -6, 1, 24, -1, -82, -12, 124, -4, 8, 4, -16, -8512, ...e(15), -78, 80, -77, 80, -77, 80, -73, 80, -942, 8553, -8546, 8547, -260, -8306, 9468, -9472],
54+
'iso-8859-10': [...i0, 100, 14, 16, 8, -2, 14, -143, 148, -43, 80, 6, 23, -208, 189, -32, -154, 85, 14, 16, 8, -2, 14, -128, 133, -43, 80, 6, 23, 7831, -7850, -32, ...i4a, 1, 1, 117, 7, -121, 1, 1, 1, 146, -144, 154, -152, ...e(5), ...i4b, 1, 1, 86, 7, -90, 1, 1, 1, 115, -113, 123, -121, 1, 1, 1, 1, 58],
55+
'iso-8859-13': [...i0, 8061, -8059, 1, 1, 8058, -8056, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, 1, 1, 1, 8041, -8039, ...p3, 7835],
56+
'iso-8859-14': [...i0, 7522, 1, -7520, 103, 1, 7423, -7523, 7641, -7639, 7641, -119, 231, -7749, 1, 202, 7334, 1, -7423, 1, 7455, 1, -7563, 7584, 43, -42, 44, -35, 147, -111, 1, -36, -7585, ...e(15), 165, -163, ...e(5), 7572, -7570, ...e(5), 153, -151, ...e(16), 134, -132, ...e(5), 7541, -7539, ...e(5), 122],
57+
'iso-8859-15': [...i0, 1, 1, 1, 8201, -8199, 187, -185, 186, -184, ...e(10), 202, -200, 1, 1, 199, -197, 1, 1, 151, 1, 37],
58+
'iso-8859-16': [...i0, 100, 1, 60, 8043, -142, -7870, -185, 186, -184, 367, -365, 206, -204, 205, 1, -203, 1, 91, 54, 59, 7840, -8039, 1, 199, -113, 268, -350, 151, 1, 37, 4, -188, 1, 1, 64, -62, 66, -64, ...e(9), 65, 51, -113, 1, 1, 124, -122, 132, 22, -151, 1, 1, 1, 60, 258, -315, 1, 1, 1, 33, -31, 35, -33, ...e(9), 34, 51, -82, 1, 1, 93, -91, 101, 22, -120, 1, 1, 1, 29, 258],
59+
'iso-8859-2': [...i0, 100, 468, -407, -157, 153, 29, -179, 1, 184, -2, 6, 21, -204, 208, -2, -203, 85, 470, -409, -142, 138, 29, 364, -527, 169, -2, 6, 21, 355, -351, -2, ...i2],
60+
'iso-8859-3': [...i0, 134, 434, -565, 1, 65369, -65241, -125, 1, 136, 46, -64, 22, -135, 65360, -65154, -203, 119, -117, 1, 1, 1, 112, -110, 1, 121, 46, -64, 22, -120, 65344, -65153, -188, 1, 1, 65339, -65337, 70, -2, -65, ...e(8), 65326, -65324, 1, 1, 1, 76, -74, 1, 69, -67, 1, 1, 1, 144, -16, -125, 1, 1, 1, 65307, -65305, 39, -2, -34, ...e(8), 65294, -65292, 1, 1, 1, 45, -43, 1, 38, -36, 1, 1, 1, 113, -16, 380],
61+
'iso-8859-4': [...i0, 100, 52, 30, -178, 132, 19, -148, 1, 184, -78, 16, 68, -185, 208, -206, 1, 85, 470, -388, -163, 117, 19, 395, -527, 169, -78, 16, 68, -29, 52, -51, ...i4a, 92, -26, 53, 7, -22, -98, 1, 1, 1, 1, 154, -152, 1, 1, 140, 2, -139, ...i4b, 61, -26, 53, 7, -22, -67, 1, 1, 1, 1, 123, -121, 1, 1, 109, 2, 366],
62+
'iso-8859-5': [...i0, 865, ...e(11), -863, 865, ...e(65), 7367, -7365, ...e(11), -949, 951, 1],
63+
'iso-8859-6': [...i0, 65373, 0, 0, -65369, 65369, ...z(6), -63985, -1375, 65360, ...z(12), -63970, 63970, 0, 0, -63966, 63966, -63964, ...e(25), 63939, 0, 0, 0, 0, -63933, ...e(18), 63915, ...z(12)],
64+
'iso-8859-7': [...i0, 8056, 1, -8054, 8201, 3, -8201, 1, 1, 1, 721, -719, 1, 1, 65360, -57320, -8037, 1, 1, 1, 721, 1, 1, -719, ...i7],
65+
'iso-8859-8': [...i0, 65373, -65371, ...e(7), 46, -44, ...e(14), 62, -60, 1, 1, 1, 65343, ...z(31), -57318, -6727, ...i8],
66+
'koi8-r': [...k8a, 8450, ...e(14), -8544, 8545, ...e(10), -9411, 933, ...k8b, -28, ...k8b],
67+
'koi8-u': [...k8a, 3, 8448, -8446, 1, 8448, 1, 1, 1, 1, -8394, -51, 8448, 1, 1, 1, -8544, 3, 8543, -8541, 1, 8543, 1, 1, 1, 1, -8410, -130, -869, 933, ...k8b, -28, ...k8b],
68+
'macintosh': [196, 1, 2, 2, 8, 5, 6, 5, -1, 2, 2, -1, 2, 2, 2, -1, 2, 1, 2, -1, 2, 1, 2, 2, -1, 2, 2, -1, 5, -1, 2, 1, 7972, -8048, -14, 1, 4, 8059, -8044, 41, -49, -5, 8313, -8302, -12, 8632, -8602, 18, 8518, -8557, 8627, 1, -8640, 16, 8525, 15, -2, -7759, 7787, -8577, 16, 751, -707, 18, -57, -30, 11, ...m0, 32, 3, 18, 125, 1, 7872, 1, 8, 1, -5, 1, -7970, 9427, -9419, 121, 7884, 104, -115, 1, 56007, 1, -56033, -8042, 8035, 4, 18, -8046, 8, -9, 10, -3, 5, 1, 1, -3, 7, 1, 63531, -63533, 8, 1, -2, 88, 405, 22, -557, 553, 1, 1, -546, 549, -2, -20],
69+
'windows-1250': [...w0, -7888, 7897, -7903, 10, 25, -4, -233, ...w8, -8060, 8330, -8129, 7897, -7903, 10, 25, -4, -218, 551, 17, -407, -157, 96, -94, 1, 1, 1, 181, -179, 1, 1, 1, 205, -203, 1, 554, -409, -142, 1, 1, 1, 1, 77, 90, -164, 130, 416, -415, 62, ...i2],
70+
'windows-1251': [1026, 1, 7191, -7111, 7115, 8, -6, 1, 139, -124, -7207, 7216, -7215, 2, -1, 4, 67, 7110, 1, 3, 1, 5, -15, 1, -8060, 8330, -7369, 7137, -7136, 2, -1, 4, -959, 878, 80, -86, -868, 1004, -1002, 1, 858, -856, 859, -857, 1, 1, 1, 857, -855, 1, 853, 80, 59, -988, 1, 1, 922, 7365, -7362, -921, 925, -83, 80, 2, -71, ...e(63)],
71+
'windows-1252': [...p1, -7515, 7530, -7888, 7897, -7911, -197, 240, -238, 1, ...w1, 225, -6],
72+
'windows-1253': [...p1, -8089, 8104, -8102, 8111, -8109, 1, 1, 1, 1, ...w3, 1, 1, 1, 1, 741, 1, -739, 1, 1, 1, 1, 1, 1, 65364, -65362, 1, 1, 1, 8039, -8037, 1, 1, 1, 721, -719, 1, 1, ...i7],
73+
'windows-1254': [...p1, -7515, 7530, -7888, 7897, -7911, -197, 1, 1, 1, ...w1, 1, 218, -216, ...e(47), 79, -77, ...e(11), 84, 46, -127, ...e(16), 48, -46, ...e(11), 53, 46],
74+
'windows-1255': [...p1, -7515, 7530, -8102, 8111, -8109, 1, 1, 1, 1, ...w8, -7480, 7750, -8328, 8096, -8094, ...e(7), 8199, -8197, 1, 1, 1, 1, 46, -44, ...e(14), 62, -60, 1, 1, 1, 1, 1265, ...e(19), 45, 1, 1, 1, 1, 64009, ...z(6), -64045, ...i8],
75+
'windows-1256': [8364, -6702, 6556, -7816, 7820, 8, -6, 1, -7515, 7530, -6583, 6592, -7911, 1332, 18, -16, 39, 6505, 1, 3, 1, 5, -15, 1, -6507, 6777, -6801, 6569, -7911, 7865, 1, -6483, -1562, 1388, -1386, ...e(7), 1557, -1555, ...e(14), 1378, -1376, 1, 1, 1, 1377, 162, -160, ...e(21), -1375, 1376, 1, 1, 1, 6, 1, 1, 1, -1379, 1380, -1378, 1379, 1, 1, 1, -1377, 1, 1, 1, 1, 1374, 1, -1372, 1, 1372, 1, 1, 1, -1370, 1371, 1, -1369, 1370, -1368, 1369, -1367, 1, 7954, 1, -6461],
76+
'windows-1257': [...w0, -8102, 8111, -8109, 28, 543, -527, -40, ...w3, 19, 556, -572, 1, 65373, -65371, 1, 1, 65369, -65367, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, ...e(5), ...p3, 347],
77+
'windows-1258': [...p1, -7515, 7530, -8102, 8111, -7911, -197, 1, 1, 1, ...w8, -7480, 7750, -8328, 8096, -7911, -182, 1, 218, -216, ...e(34), 64, -62, ...e(7), 565, -563, 1, 1, 65, -63, 568, -566, 1, 204, -202, 1, 1, 1, 1, 1, 1, 211, 340, -548, 1, 1, 1, 33, -31, ...e(7), 534, -532, 1, 1, 34, -32, 562, -560, 1, 173, -171, 1, 1, 1, 1, 1, 1, 180, 7931],
78+
'windows-874': [8364, -8235, 1, 1, 1, 8098, -8096, ...e(10), ...w8, -8060, ...e(8), 3425, ...e(57), 61891, 0, 0, 0, -61886, ...e(28), 61858, 0, 0, 0],
79+
'x-mac-cyrillic': [1040, ...e(31), 7153, -8048, 992, -1005, 4, 8059, -8044, 848, -856, -5, 8313, -7456, 80, 7694, -7773, 80, 7627, -8557, 8627, 1, -7695, -929, 988, -137, -4, 80, -77, 80, -78, 80, -79, 80, -2, -83, -857, ...m0, 875, 80, -79, 80, -7, 7102, 1, 8, 1, -5, 1, -7970, 7975, -7184, 80, -79, 80, 7351, -7445, 80, -2, -31, ...e(30), 7262],
80+
};
81+
82+
/* eslint-enable @stylistic/js/max-len */
83+
84+
/* fallback/single-byte.js + /single-byte.node.js, simplified */
85+
86+
function getEncoding(encoding) {
87+
if (encoding === 'x-user-defined') {
88+
return TypedArrayFrom(Uint16Array, { length: 256 }, (_, i) => (i >= 0x80 ? 0xf700 + i : i));
89+
}
90+
91+
if (!ObjectPrototypeHasOwnProperty(encodings, encoding)) {
92+
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
93+
}
94+
95+
const map = TypedArrayFrom(Uint16Array, { length: 256 }, (_, i) => i); // Unicode subset
96+
map.set(TypedArrayFrom(Uint16Array, encodings[encoding]), 128);
97+
return map;
98+
}
99+
100+
const supported = new SafeSet([...ObjectKeys(encodings), 'iso-8859-8-i', 'x-user-defined']);
101+
const isSinglebyteEncoding = (enc) => supported.has(enc);
102+
103+
const decoders = new SafeMap();
104+
105+
function createSinglebyteDecoder(encoding, fatal) {
106+
const id = encoding === 'iso-8859-8-i' ? 'iso-8859-8' : encoding;
107+
const cached = decoders.get(id);
108+
if (cached) return cached;
109+
110+
const map = getEncoding(id);
111+
const incomplete = map.includes(0xfffd);
112+
113+
// Expecta type-checked Buffer input
114+
const decoder = (buf) => {
115+
if (buf.byteLength === 0) return '';
116+
if (isAscii(buf)) return buf.latin1Slice(); // .latin1Slice is faster than .asciiSlice
117+
const o = TypedArrayFrom(Uint16Array, buf); // Copy to modify in-place, also those are 16-bit now
118+
119+
let i = 0;
120+
for (const end7 = o.length - 7; i < end7; i += 8) {
121+
o[i] = map[o[i]];
122+
o[i + 1] = map[o[i + 1]];
123+
o[i + 2] = map[o[i + 2]];
124+
o[i + 3] = map[o[i + 3]];
125+
o[i + 4] = map[o[i + 4]];
126+
o[i + 5] = map[o[i + 5]];
127+
o[i + 6] = map[o[i + 6]];
128+
o[i + 7] = map[o[i + 7]];
129+
}
130+
131+
for (const end = o.length; i < end; i++) o[i] = map[o[i]];
132+
133+
const b = Buffer.from(o.buffer, o.byteOffset, o.byteLength);
134+
if (isBigEndian) b.swap16();
135+
const string = b.ucs2Slice(0, b.byteLength);
136+
if (fatal && incomplete && StringPrototypeIncludes(string, '\uFFFD')) {
137+
throw new ERR_ENCODING_INVALID_ENCODED_DATA(`The encoded data was not valid for encoding ${encoding}`);
138+
}
139+
return string;
140+
};
141+
142+
decoders.set(id, decoder);
143+
return decoder;
144+
}
145+
146+
module.exports = {
147+
isSinglebyteEncoding,
148+
createSinglebyteDecoder,
149+
getEncoding, // for tests
150+
};

src/encoding_binding.cc

Lines changed: 0 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include "encoding_binding.h"
22
#include "ada.h"
33
#include "env-inl.h"
4-
#include "node_buffer.h"
54
#include "node_errors.h"
65
#include "node_external_reference.h"
76
#include "simdutf.h"
@@ -414,8 +413,6 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
414413
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
415414
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
416415
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
417-
SetMethodNoSideEffect(
418-
isolate, target, "decodeWindows1252", DecodeWindows1252);
419416
}
420417

421418
void BindingData::CreatePerContextProperties(Local<Object> target,
@@ -433,77 +430,6 @@ void BindingData::RegisterTimerExternalReferences(
433430
registry->Register(DecodeUTF8);
434431
registry->Register(ToASCII);
435432
registry->Register(ToUnicode);
436-
registry->Register(DecodeWindows1252);
437-
}
438-
439-
void BindingData::DecodeWindows1252(const FunctionCallbackInfo<Value>& args) {
440-
Environment* env = Environment::GetCurrent(args);
441-
442-
CHECK_GE(args.Length(), 1);
443-
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
444-
args[0]->IsArrayBufferView())) {
445-
return node::THROW_ERR_INVALID_ARG_TYPE(
446-
env->isolate(),
447-
"The \"input\" argument must be an instance of ArrayBuffer, "
448-
"SharedArrayBuffer, or ArrayBufferView.");
449-
}
450-
451-
bool ignore_bom = args[1]->IsTrue();
452-
453-
ArrayBufferViewContents<uint8_t> buffer(args[0]);
454-
const uint8_t* data = buffer.data();
455-
size_t length = buffer.length();
456-
457-
if (ignore_bom && length > 0 && data[0] == 0xFF) {
458-
data++;
459-
length--;
460-
}
461-
462-
if (length == 0) {
463-
return args.GetReturnValue().SetEmptyString();
464-
}
465-
466-
// Windows-1252 specific mapping for bytes 128-159
467-
// These differ from Latin-1/ISO-8859-1
468-
static const uint16_t windows1252_mapping[32] = {
469-
0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
470-
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
471-
0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
472-
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
473-
};
474-
475-
std::string result;
476-
result.reserve(length * 3); // Reserve space for UTF-8 output
477-
478-
for (size_t i = 0; i < length; i++) {
479-
uint8_t byte = data[i];
480-
uint32_t codepoint;
481-
482-
// Check if byte is in the special Windows-1252 range (128-159)
483-
if (byte >= 0x80 && byte <= 0x9F) {
484-
codepoint = windows1252_mapping[byte - 0x80];
485-
} else {
486-
// For all other bytes, Windows-1252 is identical to Latin-1
487-
codepoint = byte;
488-
}
489-
490-
// Convert codepoint to UTF-8
491-
if (codepoint < 0x80) {
492-
result.push_back(static_cast<char>(codepoint));
493-
} else if (codepoint < 0x800) {
494-
result.push_back(static_cast<char>(0xC0 | (codepoint >> 6)));
495-
result.push_back(static_cast<char>(0x80 | (codepoint & 0x3F)));
496-
} else {
497-
result.push_back(static_cast<char>(0xE0 | (codepoint >> 12)));
498-
result.push_back(static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F)));
499-
result.push_back(static_cast<char>(0x80 | (codepoint & 0x3F)));
500-
}
501-
}
502-
503-
Local<Value> ret;
504-
if (ToV8Value(env->context(), result, env->isolate()).ToLocal(&ret)) {
505-
args.GetReturnValue().Set(ret);
506-
}
507433
}
508434

509435
} // namespace encoding_binding

src/encoding_binding.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@ class BindingData : public SnapshotableObject {
3131
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
3232
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
3333
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
34-
static void DecodeWindows1252(
35-
const v8::FunctionCallbackInfo<v8::Value>& args);
3634

3735
static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
3836
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);

0 commit comments

Comments
 (0)