|
| 1 | +// Simplified version extracted from https://npmjs.com/package/@exodus/bytes codepath for 1-byte encodings |
| 2 | +// Copyright Exodus Movement. Licensed under MIT License. |
| 3 | + |
| 4 | +'use strict'; |
| 5 | + |
| 6 | +const { |
| 7 | + ArrayFrom, |
| 8 | + ObjectKeys, |
| 9 | + ObjectPrototypeHasOwnProperty, |
| 10 | + SafeMap, |
| 11 | + SafeSet, |
| 12 | + StringPrototypeIncludes, |
| 13 | + TypedArrayFrom, |
| 14 | +} = primordials; |
| 15 | + |
| 16 | +const { Buffer, isAscii } = require('buffer'); |
| 17 | + |
| 18 | +const { |
| 19 | + ERR_ENCODING_NOT_SUPPORTED, |
| 20 | + ERR_ENCODING_INVALID_ENCODED_DATA, |
| 21 | +} = require('internal/errors').codes; |
| 22 | + |
| 23 | +const { isBigEndian } = internalBinding('os'); |
| 24 | + |
| 25 | +/* fallback/single-byte.encodings.js, prepared differently */ |
| 26 | + |
| 27 | +const e = (length) => ArrayFrom({ length }, () => 1); |
| 28 | +const z = (length) => ArrayFrom({ length }, () => 0); |
| 29 | + |
| 30 | +/* eslint-disable @stylistic/js/max-len */ |
| 31 | + |
| 32 | +// Common ranges |
| 33 | + |
| 34 | +const k8a = [9472, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449]; |
| 35 | +const k8b = [-30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3]; |
| 36 | +const p1 = [8364, -8235, 8089, -7816, 7820, 8, -6, 1]; |
| 37 | +const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]; |
| 38 | +const p3 = [1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, ...p2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, ...p2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2]; |
| 39 | +const i0 = [128, ...e(32)]; |
| 40 | +const i2 = [-40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374]; |
| 41 | +const i4a = [-75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1]; |
| 42 | +const i4b = [34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1]; |
| 43 | +const i7 = [721, 1, 1, -719, 721, -719, 721, ...e(19), 64604, -64602, ...e(43), 64559]; |
| 44 | +const i8 = [...e(26), 64019, 0, -57327, 1, 57326]; |
| 45 | +const w0 = [8364, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]; |
| 46 | +const w8 = [8072, 1, 3, 1, 5, -15, 1]; |
| 47 | +const w1 = [...w8, -7480, 7750, -8129, 7897, -7911, -182]; |
| 48 | +const w3 = [...w8, -8060, 8330, -8328, 8096, -8094]; |
| 49 | +const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]; |
| 50 | + |
| 51 | +const encodings = { |
| 52 | + '__proto__': null, |
| 53 | + 'ibm866': [1040, ...e(47), 8530, 1, 1, -145, 34, 61, 1, -12, -1, 14, -18, 6, 6, -1, -1, -75, 4, 32, -8, -16, -28, 60, 34, 1, -5, -6, 21, -3, -6, -16, 28, -5, 1, -4, 1, -12, -1, -6, 1, 24, -1, -82, -12, 124, -4, 8, 4, -16, -8512, ...e(15), -78, 80, -77, 80, -77, 80, -73, 80, -942, 8553, -8546, 8547, -260, -8306, 9468, -9472], |
| 54 | + 'iso-8859-10': [...i0, 100, 14, 16, 8, -2, 14, -143, 148, -43, 80, 6, 23, -208, 189, -32, -154, 85, 14, 16, 8, -2, 14, -128, 133, -43, 80, 6, 23, 7831, -7850, -32, ...i4a, 1, 1, 117, 7, -121, 1, 1, 1, 146, -144, 154, -152, ...e(5), ...i4b, 1, 1, 86, 7, -90, 1, 1, 1, 115, -113, 123, -121, 1, 1, 1, 1, 58], |
| 55 | + 'iso-8859-13': [...i0, 8061, -8059, 1, 1, 8058, -8056, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, 1, 1, 1, 8041, -8039, ...p3, 7835], |
| 56 | + 'iso-8859-14': [...i0, 7522, 1, -7520, 103, 1, 7423, -7523, 7641, -7639, 7641, -119, 231, -7749, 1, 202, 7334, 1, -7423, 1, 7455, 1, -7563, 7584, 43, -42, 44, -35, 147, -111, 1, -36, -7585, ...e(15), 165, -163, ...e(5), 7572, -7570, ...e(5), 153, -151, ...e(16), 134, -132, ...e(5), 7541, -7539, ...e(5), 122], |
| 57 | + 'iso-8859-15': [...i0, 1, 1, 1, 8201, -8199, 187, -185, 186, -184, ...e(10), 202, -200, 1, 1, 199, -197, 1, 1, 151, 1, 37], |
| 58 | + 'iso-8859-16': [...i0, 100, 1, 60, 8043, -142, -7870, -185, 186, -184, 367, -365, 206, -204, 205, 1, -203, 1, 91, 54, 59, 7840, -8039, 1, 199, -113, 268, -350, 151, 1, 37, 4, -188, 1, 1, 64, -62, 66, -64, ...e(9), 65, 51, -113, 1, 1, 124, -122, 132, 22, -151, 1, 1, 1, 60, 258, -315, 1, 1, 1, 33, -31, 35, -33, ...e(9), 34, 51, -82, 1, 1, 93, -91, 101, 22, -120, 1, 1, 1, 29, 258], |
| 59 | + 'iso-8859-2': [...i0, 100, 468, -407, -157, 153, 29, -179, 1, 184, -2, 6, 21, -204, 208, -2, -203, 85, 470, -409, -142, 138, 29, 364, -527, 169, -2, 6, 21, 355, -351, -2, ...i2], |
| 60 | + 'iso-8859-3': [...i0, 134, 434, -565, 1, 65369, -65241, -125, 1, 136, 46, -64, 22, -135, 65360, -65154, -203, 119, -117, 1, 1, 1, 112, -110, 1, 121, 46, -64, 22, -120, 65344, -65153, -188, 1, 1, 65339, -65337, 70, -2, -65, ...e(8), 65326, -65324, 1, 1, 1, 76, -74, 1, 69, -67, 1, 1, 1, 144, -16, -125, 1, 1, 1, 65307, -65305, 39, -2, -34, ...e(8), 65294, -65292, 1, 1, 1, 45, -43, 1, 38, -36, 1, 1, 1, 113, -16, 380], |
| 61 | + 'iso-8859-4': [...i0, 100, 52, 30, -178, 132, 19, -148, 1, 184, -78, 16, 68, -185, 208, -206, 1, 85, 470, -388, -163, 117, 19, 395, -527, 169, -78, 16, 68, -29, 52, -51, ...i4a, 92, -26, 53, 7, -22, -98, 1, 1, 1, 1, 154, -152, 1, 1, 140, 2, -139, ...i4b, 61, -26, 53, 7, -22, -67, 1, 1, 1, 1, 123, -121, 1, 1, 109, 2, 366], |
| 62 | + 'iso-8859-5': [...i0, 865, ...e(11), -863, 865, ...e(65), 7367, -7365, ...e(11), -949, 951, 1], |
| 63 | + 'iso-8859-6': [...i0, 65373, 0, 0, -65369, 65369, ...z(6), -63985, -1375, 65360, ...z(12), -63970, 63970, 0, 0, -63966, 63966, -63964, ...e(25), 63939, 0, 0, 0, 0, -63933, ...e(18), 63915, ...z(12)], |
| 64 | + 'iso-8859-7': [...i0, 8056, 1, -8054, 8201, 3, -8201, 1, 1, 1, 721, -719, 1, 1, 65360, -57320, -8037, 1, 1, 1, 721, 1, 1, -719, ...i7], |
| 65 | + 'iso-8859-8': [...i0, 65373, -65371, ...e(7), 46, -44, ...e(14), 62, -60, 1, 1, 1, 65343, ...z(31), -57318, -6727, ...i8], |
| 66 | + 'koi8-r': [...k8a, 8450, ...e(14), -8544, 8545, ...e(10), -9411, 933, ...k8b, -28, ...k8b], |
| 67 | + 'koi8-u': [...k8a, 3, 8448, -8446, 1, 8448, 1, 1, 1, 1, -8394, -51, 8448, 1, 1, 1, -8544, 3, 8543, -8541, 1, 8543, 1, 1, 1, 1, -8410, -130, -869, 933, ...k8b, -28, ...k8b], |
| 68 | + 'macintosh': [196, 1, 2, 2, 8, 5, 6, 5, -1, 2, 2, -1, 2, 2, 2, -1, 2, 1, 2, -1, 2, 1, 2, 2, -1, 2, 2, -1, 5, -1, 2, 1, 7972, -8048, -14, 1, 4, 8059, -8044, 41, -49, -5, 8313, -8302, -12, 8632, -8602, 18, 8518, -8557, 8627, 1, -8640, 16, 8525, 15, -2, -7759, 7787, -8577, 16, 751, -707, 18, -57, -30, 11, ...m0, 32, 3, 18, 125, 1, 7872, 1, 8, 1, -5, 1, -7970, 9427, -9419, 121, 7884, 104, -115, 1, 56007, 1, -56033, -8042, 8035, 4, 18, -8046, 8, -9, 10, -3, 5, 1, 1, -3, 7, 1, 63531, -63533, 8, 1, -2, 88, 405, 22, -557, 553, 1, 1, -546, 549, -2, -20], |
| 69 | + 'windows-1250': [...w0, -7888, 7897, -7903, 10, 25, -4, -233, ...w8, -8060, 8330, -8129, 7897, -7903, 10, 25, -4, -218, 551, 17, -407, -157, 96, -94, 1, 1, 1, 181, -179, 1, 1, 1, 205, -203, 1, 554, -409, -142, 1, 1, 1, 1, 77, 90, -164, 130, 416, -415, 62, ...i2], |
| 70 | + 'windows-1251': [1026, 1, 7191, -7111, 7115, 8, -6, 1, 139, -124, -7207, 7216, -7215, 2, -1, 4, 67, 7110, 1, 3, 1, 5, -15, 1, -8060, 8330, -7369, 7137, -7136, 2, -1, 4, -959, 878, 80, -86, -868, 1004, -1002, 1, 858, -856, 859, -857, 1, 1, 1, 857, -855, 1, 853, 80, 59, -988, 1, 1, 922, 7365, -7362, -921, 925, -83, 80, 2, -71, ...e(63)], |
| 71 | + 'windows-1252': [...p1, -7515, 7530, -7888, 7897, -7911, -197, 240, -238, 1, ...w1, 225, -6], |
| 72 | + 'windows-1253': [...p1, -8089, 8104, -8102, 8111, -8109, 1, 1, 1, 1, ...w3, 1, 1, 1, 1, 741, 1, -739, 1, 1, 1, 1, 1, 1, 65364, -65362, 1, 1, 1, 8039, -8037, 1, 1, 1, 721, -719, 1, 1, ...i7], |
| 73 | + 'windows-1254': [...p1, -7515, 7530, -7888, 7897, -7911, -197, 1, 1, 1, ...w1, 1, 218, -216, ...e(47), 79, -77, ...e(11), 84, 46, -127, ...e(16), 48, -46, ...e(11), 53, 46], |
| 74 | + 'windows-1255': [...p1, -7515, 7530, -8102, 8111, -8109, 1, 1, 1, 1, ...w8, -7480, 7750, -8328, 8096, -8094, ...e(7), 8199, -8197, 1, 1, 1, 1, 46, -44, ...e(14), 62, -60, 1, 1, 1, 1, 1265, ...e(19), 45, 1, 1, 1, 1, 64009, ...z(6), -64045, ...i8], |
| 75 | + 'windows-1256': [8364, -6702, 6556, -7816, 7820, 8, -6, 1, -7515, 7530, -6583, 6592, -7911, 1332, 18, -16, 39, 6505, 1, 3, 1, 5, -15, 1, -6507, 6777, -6801, 6569, -7911, 7865, 1, -6483, -1562, 1388, -1386, ...e(7), 1557, -1555, ...e(14), 1378, -1376, 1, 1, 1, 1377, 162, -160, ...e(21), -1375, 1376, 1, 1, 1, 6, 1, 1, 1, -1379, 1380, -1378, 1379, 1, 1, 1, -1377, 1, 1, 1, 1, 1374, 1, -1372, 1, 1372, 1, 1, 1, -1370, 1371, 1, -1369, 1370, -1368, 1369, -1367, 1, 7954, 1, -6461], |
| 76 | + 'windows-1257': [...w0, -8102, 8111, -8109, 28, 543, -527, -40, ...w3, 19, 556, -572, 1, 65373, -65371, 1, 1, 65369, -65367, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, ...e(5), ...p3, 347], |
| 77 | + 'windows-1258': [...p1, -7515, 7530, -8102, 8111, -7911, -197, 1, 1, 1, ...w8, -7480, 7750, -8328, 8096, -7911, -182, 1, 218, -216, ...e(34), 64, -62, ...e(7), 565, -563, 1, 1, 65, -63, 568, -566, 1, 204, -202, 1, 1, 1, 1, 1, 1, 211, 340, -548, 1, 1, 1, 33, -31, ...e(7), 534, -532, 1, 1, 34, -32, 562, -560, 1, 173, -171, 1, 1, 1, 1, 1, 1, 180, 7931], |
| 78 | + 'windows-874': [8364, -8235, 1, 1, 1, 8098, -8096, ...e(10), ...w8, -8060, ...e(8), 3425, ...e(57), 61891, 0, 0, 0, -61886, ...e(28), 61858, 0, 0, 0], |
| 79 | + 'x-mac-cyrillic': [1040, ...e(31), 7153, -8048, 992, -1005, 4, 8059, -8044, 848, -856, -5, 8313, -7456, 80, 7694, -7773, 80, 7627, -8557, 8627, 1, -7695, -929, 988, -137, -4, 80, -77, 80, -78, 80, -79, 80, -2, -83, -857, ...m0, 875, 80, -79, 80, -7, 7102, 1, 8, 1, -5, 1, -7970, 7975, -7184, 80, -79, 80, 7351, -7445, 80, -2, -31, ...e(30), 7262], |
| 80 | +}; |
| 81 | + |
| 82 | +/* eslint-enable @stylistic/js/max-len */ |
| 83 | + |
| 84 | +/* fallback/single-byte.js + /single-byte.node.js, simplified */ |
| 85 | + |
| 86 | +function getEncoding(encoding) { |
| 87 | + if (encoding === 'x-user-defined') { |
| 88 | + return TypedArrayFrom(Uint16Array, { length: 256 }, (_, i) => (i >= 0x80 ? 0xf700 + i : i)); |
| 89 | + } |
| 90 | + |
| 91 | + if (!ObjectPrototypeHasOwnProperty(encodings, encoding)) { |
| 92 | + throw new ERR_ENCODING_NOT_SUPPORTED(encoding); |
| 93 | + } |
| 94 | + |
| 95 | + const map = TypedArrayFrom(Uint16Array, { length: 256 }, (_, i) => i); // Unicode subset |
| 96 | + map.set(TypedArrayFrom(Uint16Array, encodings[encoding]), 128); |
| 97 | + return map; |
| 98 | +} |
| 99 | + |
| 100 | +const supported = new SafeSet([...ObjectKeys(encodings), 'iso-8859-8-i', 'x-user-defined']); |
| 101 | +const isSinglebyteEncoding = (enc) => supported.has(enc); |
| 102 | + |
| 103 | +const decoders = new SafeMap(); |
| 104 | + |
| 105 | +function createSinglebyteDecoder(encoding, fatal) { |
| 106 | + const id = encoding === 'iso-8859-8-i' ? 'iso-8859-8' : encoding; |
| 107 | + const cached = decoders.get(id); |
| 108 | + if (cached) return cached; |
| 109 | + |
| 110 | + const map = getEncoding(id); |
| 111 | + const incomplete = map.includes(0xfffd); |
| 112 | + |
| 113 | + // Expecta type-checked Buffer input |
| 114 | + const decoder = (buf) => { |
| 115 | + if (buf.byteLength === 0) return ''; |
| 116 | + if (isAscii(buf)) return buf.latin1Slice(); // .latin1Slice is faster than .asciiSlice |
| 117 | + const o = TypedArrayFrom(Uint16Array, buf); // Copy to modify in-place, also those are 16-bit now |
| 118 | + |
| 119 | + let i = 0; |
| 120 | + for (const end7 = o.length - 7; i < end7; i += 8) { |
| 121 | + o[i] = map[o[i]]; |
| 122 | + o[i + 1] = map[o[i + 1]]; |
| 123 | + o[i + 2] = map[o[i + 2]]; |
| 124 | + o[i + 3] = map[o[i + 3]]; |
| 125 | + o[i + 4] = map[o[i + 4]]; |
| 126 | + o[i + 5] = map[o[i + 5]]; |
| 127 | + o[i + 6] = map[o[i + 6]]; |
| 128 | + o[i + 7] = map[o[i + 7]]; |
| 129 | + } |
| 130 | + |
| 131 | + for (const end = o.length; i < end; i++) o[i] = map[o[i]]; |
| 132 | + |
| 133 | + const b = Buffer.from(o.buffer, o.byteOffset, o.byteLength); |
| 134 | + if (isBigEndian) b.swap16(); |
| 135 | + const string = b.ucs2Slice(0, b.byteLength); |
| 136 | + if (fatal && incomplete && StringPrototypeIncludes(string, '\uFFFD')) { |
| 137 | + throw new ERR_ENCODING_INVALID_ENCODED_DATA(`The encoded data was not valid for encoding ${encoding}`); |
| 138 | + } |
| 139 | + return string; |
| 140 | + }; |
| 141 | + |
| 142 | + decoders.set(id, decoder); |
| 143 | + return decoder; |
| 144 | +} |
| 145 | + |
| 146 | +module.exports = { |
| 147 | + isSinglebyteEncoding, |
| 148 | + createSinglebyteDecoder, |
| 149 | + getEncoding, // for tests |
| 150 | +}; |
0 commit comments