From efa5be8fcb51f3d050df134ef02bae76e383fea0 Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Wed, 17 Dec 2025 15:25:40 +0400 Subject: [PATCH 1/2] lib: implement all 1-byte encodings in js --- lib/internal/encoding.js | 74 +++++---- lib/internal/encoding/single-byte.js | 155 ++++++++++++++++++ src/encoding_binding.cc | 74 --------- src/encoding_binding.h | 2 - test/parallel/test-bootstrap-modules.js | 1 + .../test-internal-encoding-binding.js | 53 ------ typings/internalBinding/encoding_binding.d.ts | 1 - 7 files changed, 196 insertions(+), 164 deletions(-) create mode 100644 lib/internal/encoding/single-byte.js delete mode 100644 test/parallel/test-internal-encoding-binding.js diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 61f48f3395fba7..d30e68df4304a2 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -13,24 +13,27 @@ const { StringPrototypeSlice, Symbol, SymbolToStringTag, - Uint8Array, } = primordials; +const { FastBuffer } = require('internal/buffer'); + const { ERR_ENCODING_NOT_SUPPORTED, ERR_INVALID_ARG_TYPE, ERR_INVALID_THIS, ERR_NO_ICU, } = require('internal/errors').codes; +const kMethod = Symbol('method'); const kHandle = Symbol('handle'); const kFlags = Symbol('flags'); const kEncoding = Symbol('encoding'); const kDecoder = Symbol('decoder'); const kFatal = Symbol('kFatal'); const kUTF8FastPath = Symbol('kUTF8FastPath'); -const kWindows1252FastPath = Symbol('kWindows1252FastPath'); const kIgnoreBOM = Symbol('kIgnoreBOM'); +const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte'); + const { getConstructorOf, customInspectSymbol: inspect, @@ -55,11 +58,8 @@ const { encodeIntoResults, encodeUtf8String, decodeUTF8, - decodeWindows1252, } = binding; -const { Buffer } = require('buffer'); - function validateDecoder(obj) { if (obj == null || obj[kDecoder] !== true) throw new ERR_INVALID_THIS('TextDecoder'); @@ -69,7 +69,7 @@ const CONVERTER_FLAGS_FLUSH = 0x1; const CONVERTER_FLAGS_FATAL = 0x2; const CONVERTER_FLAGS_IGNORE_BOM = 0x4; -const empty = new Uint8Array(0); +const empty = new FastBuffer(); const encodings = new SafeMap([ ['unicode-1-1-utf-8', 'utf-8'], @@ -387,6 +387,24 @@ ObjectDefineProperties( [SymbolToStringTag]: { __proto__: null, configurable: true, value: 'TextEncoder' }, }); +function parseInput(input) { + if (isAnyArrayBuffer(input)) { + try { + return new FastBuffer(input); + } catch { + return empty; + } + } else if (isArrayBufferView(input)) { + try { + return new FastBuffer(input.buffer, input.byteOffset, input.byteLength); + } catch { + return empty; + } + } else { + throw new ERR_INVALID_ARG_TYPE('input', ['ArrayBuffer', 'ArrayBufferView'], input); + } +} + const TextDecoder = internalBinding('config').hasIntl ? makeTextDecoderICU() : @@ -420,10 +438,12 @@ function makeTextDecoderICU() { this[kFatal] = Boolean(options?.fatal); // Only support fast path for UTF-8. this[kUTF8FastPath] = enc === 'utf-8'; - this[kWindows1252FastPath] = enc === 'windows-1252'; this[kHandle] = undefined; + this[kMethod] = undefined; - if (!this[kUTF8FastPath] && !this[kWindows1252FastPath]) { + if (isSinglebyteEncoding(this.encoding)) { + this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]); + } else if (!this[kUTF8FastPath]) { this.#prepareConverter(); } } @@ -438,22 +458,18 @@ function makeTextDecoderICU() { decode(input = empty, options = kEmptyObject) { validateDecoder(this); + validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); + + if (this[kMethod]) return this[kMethod](parseInput(input)); this[kUTF8FastPath] &&= !(options?.stream); - this[kWindows1252FastPath] &&= !(options?.stream); if (this[kUTF8FastPath]) { return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); } - if (this[kWindows1252FastPath]) { - return decodeWindows1252(input, this[kIgnoreBOM], this[kFatal]); - } - this.#prepareConverter(); - validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); - let flags = 0; if (options !== null) flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH; @@ -476,7 +492,7 @@ function makeTextDecoderJS() { const kBOMSeen = Symbol('BOM seen'); function hasConverter(encoding) { - return encoding === 'utf-8' || encoding === 'utf-16le'; + return encoding === 'utf-8' || encoding === 'utf-16le' || isSinglebyteEncoding(encoding); } class TextDecoder { @@ -502,30 +518,20 @@ function makeTextDecoderJS() { this[kFlags] = flags; this[kEncoding] = enc; this[kBOMSeen] = false; + this[kMethod] = undefined; + + if (isSinglebyteEncoding(this.encoding)) { + this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]); + } } decode(input = empty, options = kEmptyObject) { validateDecoder(this); - if (isAnyArrayBuffer(input)) { - try { - input = Buffer.from(input); - } catch { - input = empty; - } - } else if (isArrayBufferView(input)) { - try { - input = Buffer.from(input.buffer, input.byteOffset, - input.byteLength); - } catch { - input = empty; - } - } else { - throw new ERR_INVALID_ARG_TYPE('input', - ['ArrayBuffer', 'ArrayBufferView'], - input); - } + input = parseInput(input); validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); + if (this[kMethod]) return this[kMethod](input); + if (this[kFlags] & CONVERTER_FLAGS_FLUSH) { this[kBOMSeen] = false; } diff --git a/lib/internal/encoding/single-byte.js b/lib/internal/encoding/single-byte.js new file mode 100644 index 00000000000000..df8042e89ad969 --- /dev/null +++ b/lib/internal/encoding/single-byte.js @@ -0,0 +1,155 @@ +// Simplified version extracted from https://npmjs.com/package/@exodus/bytes codepath for 1-byte encodings +// Copyright Exodus Movement. Licensed under MIT License. + +'use strict'; + +const { + Array, + ArrayPrototypeFill, + ObjectKeys, + ObjectPrototypeHasOwnProperty, + SafeArrayIterator, + SafeMap, + SafeSet, + StringPrototypeIncludes, + TypedArrayFrom, + TypedArrayOf, + TypedArrayPrototypeIncludes, + TypedArrayPrototypeSet, + Uint16Array, +} = primordials; + +const { isAscii } = require('buffer'); + +const { FastBuffer } = require('internal/buffer'); + +const { + ERR_ENCODING_NOT_SUPPORTED, + ERR_ENCODING_INVALID_ENCODED_DATA, +} = require('internal/errors').codes; + +const isBigEndian = new FastBuffer(TypedArrayOf(Uint16Array, 258).buffer)[1] === 2; + +const it = (x) => new SafeArrayIterator(x); + +/* fallback/single-byte.encodings.js */ + +const r = 0xfffd; +const e = (x) => it(ArrayPrototypeFill(new Array(x), 1)); +const h = (x) => it(ArrayPrototypeFill(new Array(x), r)); + +/* eslint-disable @stylistic/js/max-len */ + +// Index tables from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings +// Each table in the spec lists only mapping from byte 0x80 onwards, as below that they are all ASCII and mapped as idenity +// Here, 0xfffd (replacement charcode) designates a hole (unmapped offset), as not all encodings map all offsets +// All other numbers are deltas from the last seen mapped value, starting with 0x7f (127, highest ASCII) +// Thus, [0x80, 0x81, , 0x83] is stored as [1, 1, r, 2] +// Truncation (length < 128) means that all remaining ones are mapped as identity (offset i => codepoint i), not unmapped +const encodings = { + '__proto__': null, + 'ibm866': [913, ...e(47), 8530, 1, 1, -145, 34, 61, 1, -12, -1, 14, -18, 6, 6, -1, -1, -75, 4, 32, -8, -16, -28, 60, 34, 1, -5, -6, 21, -3, -6, -16, 28, -5, 1, -4, 1, -12, -1, -6, 1, 24, -1, -82, -12, 124, -4, 8, 4, -16, -8512, ...e(15), -78, 80, -77, 80, -77, 80, -73, 80, -942, 8553, -8546, 8547, -260, -8306, 9468, -9472], + 'iso-8859-10': [...e(33), 100, 14, 16, 8, -2, 14, -143, 148, -43, 80, 6, 23, -208, 189, -32, -154, 85, 14, 16, 8, -2, 14, -128, 133, -43, 80, 6, 23, 7831, -7850, -32, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 1, 1, 117, 7, -121, 1, 1, 1, 146, -144, 154, -152, ...e(5), 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 1, 1, 86, 7, -90, 1, 1, 1, 115, -113, 123, -121, 1, 1, 1, 1, 58], + 'iso-8859-13': [...e(33), 8061, -8059, 1, 1, 8058, -8056, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, 1, 1, 1, 8041, -8039, 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 7835], + 'iso-8859-14': [...e(33), 7522, 1, -7520, 103, 1, 7423, -7523, 7641, -7639, 7641, -119, 231, -7749, 1, 202, 7334, 1, -7423, 1, 7455, 1, -7563, 7584, 43, -42, 44, -35, 147, -111, 1, -36, -7585, ...e(15), 165, -163, ...e(5), 7572, -7570, ...e(5), 153, -151, ...e(16), 134, -132, ...e(5), 7541, -7539, ...e(5), 122], + 'iso-8859-15': [...e(33), 1, 1, 1, 8201, -8199, 187, -185, 186, -184, ...e(10), 202, -200, 1, 1, 199, -197, 1, 1, 151, 1, 37], + 'iso-8859-16': [...e(33), 100, 1, 60, 8043, -142, -7870, -185, 186, -184, 367, -365, 206, -204, 205, 1, -203, 1, 91, 54, 59, 7840, -8039, 1, 199, -113, 268, -350, 151, 1, 37, 4, -188, 1, 1, 64, -62, 66, -64, ...e(9), 65, 51, -113, 1, 1, 124, -122, 132, 22, -151, 1, 1, 1, 60, 258, -315, 1, 1, 1, 33, -31, 35, -33, ...e(9), 34, 51, -82, 1, 1, 93, -91, 101, 22, -120, 1, 1, 1, 29, 258], + 'iso-8859-2': [...e(33), 100, 468, -407, -157, 153, 29, -179, 1, 184, -2, 6, 21, -204, 208, -2, -203, 85, 470, -409, -142, 138, 29, 364, -527, 169, -2, 6, 21, 355, -351, -2, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374], + 'iso-8859-3': [...e(33), 134, 434, -565, 1, r, 128, -125, 1, 136, 46, -64, 22, -135, r, 206, -203, 119, -117, 1, 1, 1, 112, -110, 1, 121, 46, -64, 22, -120, r, 191, -188, 1, 1, r, 2, 70, -2, -65, ...e(8), r, 2, 1, 1, 1, 76, -74, 1, 69, -67, 1, 1, 1, 144, -16, -125, 1, 1, 1, r, 2, 39, -2, -34, ...e(8), r, 2, 1, 1, 1, 45, -43, 1, 38, -36, 1, 1, 1, 113, -16, 380], + 'iso-8859-4': [...e(33), 100, 52, 30, -178, 132, 19, -148, 1, 184, -78, 16, 68, -185, 208, -206, 1, 85, 470, -388, -163, 117, 19, 395, -527, 169, -78, 16, 68, -29, 52, -51, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 92, -26, 53, 7, -22, -98, 1, 1, 1, 1, 154, -152, 1, 1, 140, 2, -139, 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 61, -26, 53, 7, -22, -67, 1, 1, 1, 1, 123, -121, 1, 1, 109, 2, 366], + 'iso-8859-5': [...e(33), 865, ...e(11), -863, 865, ...e(65), 7367, -7365, ...e(11), -949, 951, 1], + 'iso-8859-6': [...e(33), r, r, r, 4, ...h(7), 1384, -1375, ...h(13), 1390, r, r, r, 4, r, 2, ...e(25), r, r, r, r, r, 6, ...e(18), ...h(13)], + 'iso-8859-7': [...e(33), 8056, 1, -8054, 8201, 3, -8201, 1, 1, 1, 721, -719, 1, 1, r, 8040, -8037, 1, 1, 1, 721, 1, 1, -719, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r], + 'iso-8859-8': [...e(33), r, 2, ...e(7), 46, -44, ...e(14), 62, -60, 1, 1, 1, ...h(32), 8025, -6727, ...e(26), r, r, 6692, 1, r], + 'koi8-r': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 8450, ...e(14), -8544, 8545, ...e(10), -9411, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3], + 'koi8-u': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 3, 8448, -8446, 1, 8448, 1, 1, 1, 1, -8394, -51, 8448, 1, 1, 1, -8544, 3, 8543, -8541, 1, 8543, 1, 1, 1, 1, -8410, -130, -869, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3], + 'macintosh': [69, 1, 2, 2, 8, 5, 6, 5, -1, 2, 2, -1, 2, 2, 2, -1, 2, 1, 2, -1, 2, 1, 2, 2, -1, 2, 2, -1, 5, -1, 2, 1, 7972, -8048, -14, 1, 4, 8059, -8044, 41, -49, -5, 8313, -8302, -12, 8632, -8602, 18, 8518, -8557, 8627, 1, -8640, 16, 8525, 15, -2, -7759, 7787, -8577, 16, 751, -707, 18, -57, -30, 11, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 32, 3, 18, 125, 1, 7872, 1, 8, 1, -5, 1, -7970, 9427, -9419, 121, 7884, 104, -115, 1, 56007, 1, -56033, -8042, 8035, 4, 18, -8046, 8, -9, 10, -3, 5, 1, 1, -3, 7, 1, 63531, -63533, 8, 1, -2, 88, 405, 22, -557, 553, 1, 1, -546, 549, -2, -20], + 'windows-1250': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -7888, 7897, -7903, 10, 25, -4, -233, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8129, 7897, -7903, 10, 25, -4, -218, 551, 17, -407, -157, 96, -94, 1, 1, 1, 181, -179, 1, 1, 1, 205, -203, 1, 554, -409, -142, 1, 1, 1, 1, 77, 90, -164, 130, 416, -415, 62, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374], + 'windows-1251': [899, 1, 7191, -7111, 7115, 8, -6, 1, 139, -124, -7207, 7216, -7215, 2, -1, 4, 67, 7110, 1, 3, 1, 5, -15, 1, -8060, 8330, -7369, 7137, -7136, 2, -1, 4, -959, 878, 80, -86, -868, 1004, -1002, 1, 858, -856, 859, -857, 1, 1, 1, 857, -855, 1, 853, 80, 59, -988, 1, 1, 922, 7365, -7362, -921, 925, -83, 80, 2, -71, ...e(63)], + 'windows-1252': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 240, -238, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 225, -6], + 'windows-1253': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 1, 1, 1, 1, 741, 1, -739, 1, 1, 1, 1, 1, 1, r, 2, 1, 1, 1, 8039, -8037, 1, 1, 1, 721, -719, 1, 1, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r], + 'windows-1254': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 1, 218, -216, ...e(47), 79, -77, ...e(11), 84, 46, -127, ...e(16), 48, -46, ...e(11), 53, 46], + 'windows-1255': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -8094, ...e(7), 8199, -8197, 1, 1, 1, 1, 46, -44, ...e(14), 62, -60, 1, 1, 1, 1, 1265, ...e(19), 45, 1, 1, 1, 1, ...h(7), -36, ...e(26), r, r, 6692, 1, r], + 'windows-1256': [8237, -6702, 6556, -7816, 7820, 8, -6, 1, -7515, 7530, -6583, 6592, -7911, 1332, 18, -16, 39, 6505, 1, 3, 1, 5, -15, 1, -6507, 6777, -6801, 6569, -7911, 7865, 1, -6483, -1562, 1388, -1386, ...e(7), 1557, -1555, ...e(14), 1378, -1376, 1, 1, 1, 1377, 162, -160, ...e(21), -1375, 1376, 1, 1, 1, 6, 1, 1, 1, -1379, 1380, -1378, 1379, 1, 1, 1, -1377, 1, 1, 1, 1, 1374, 1, -1372, 1, 1372, 1, 1, 1, -1370, 1371, 1, -1369, 1370, -1368, 1369, -1367, 1, 7954, 1, -6461], + 'windows-1257': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 28, 543, -527, -40, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 19, 556, -572, 1, r, 2, 1, 1, r, 2, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, ...e(5), 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 347], + 'windows-1258': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -7911, -182, 1, 218, -216, ...e(34), 64, -62, ...e(7), 565, -563, 1, 1, 65, -63, 568, -566, 1, 204, -202, 1, 1, 1, 1, 1, 1, 211, 340, -548, 1, 1, 1, 33, -31, ...e(7), 534, -532, 1, 1, 34, -32, 562, -560, 1, 173, -171, 1, 1, 1, 1, 1, 1, 180, 7931], + 'windows-874': [8237, -8235, 1, 1, 1, 8098, -8096, ...e(10), 8072, 1, 3, 1, 5, -15, 1, -8060, ...e(8), 3425, ...e(57), r, r, r, r, 5, ...e(28), r, r, r, r], + 'x-mac-cyrillic': [913, ...e(31), 7153, -8048, 992, -1005, 4, 8059, -8044, 848, -856, -5, 8313, -7456, 80, 7694, -7773, 80, 7627, -8557, 8627, 1, -7695, -929, 988, -137, -4, 80, -77, 80, -78, 80, -79, 80, -2, -83, -857, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 875, 80, -79, 80, -7, 7102, 1, 8, 1, -5, 1, -7970, 7975, -7184, 80, -79, 80, 7351, -7445, 80, -2, -31, ...e(30), 7262], +}; + +/* eslint-enable @stylistic/js/max-len */ + +/* fallback/single-byte.js + single-byte.node.js, simplified */ + +const l256 = { __proto__: null, length: 256 }; + +function getEncoding(encoding) { + if (encoding === 'x-user-defined') { + // https://encoding.spec.whatwg.org/#x-user-defined-decoder, 14.5.1. x-user-defined decoder + return TypedArrayFrom(Uint16Array, l256, (_, i) => (i >= 0x80 ? 0xf700 + i : i)); + } + + if (!ObjectPrototypeHasOwnProperty(encodings, encoding)) { + throw new ERR_ENCODING_NOT_SUPPORTED(encoding); + } + + const map = TypedArrayFrom(Uint16Array, l256, (_, i) => i); // Unicode subset + let prev = 127; + map.set(TypedArrayFrom(Uint16Array, it(encodings[encoding]), (x) => (x === r ? x : (prev += x))), 128); + return map; +} + +const supported = new SafeSet(it(ObjectKeys(encodings))).add('iso-8859-8-i').add('x-user-defined'); +const isSinglebyteEncoding = (enc) => supported.has(enc); + +const decodersLoose = new SafeMap(); +const decodersFatal = new SafeMap(); + +function createSinglebyteDecoder(encoding, fatal) { + const id = encoding === 'iso-8859-8-i' ? 'iso-8859-8' : encoding; + const decoders = fatal ? decodersFatal : decodersLoose; + const cached = decoders.get(id); + if (cached) return cached; + + const map = getEncoding(id); + const incomplete = TypedArrayPrototypeIncludes(map, r); + + // Expects type-checked Buffer input + const decoder = (buf) => { + if (buf.byteLength === 0) return ''; + if (isAscii(buf)) return buf.latin1Slice(); // .latin1Slice is faster than .asciiSlice + const o = new Uint16Array(buf.length); + TypedArrayPrototypeSet(o, buf); // Copy to modify in-place, also those are 16-bit now + + let i = 0; + for (const end7 = o.length - 7; i < end7; i += 8) { + o[i] = map[o[i]]; + o[i + 1] = map[o[i + 1]]; + o[i + 2] = map[o[i + 2]]; + o[i + 3] = map[o[i + 3]]; + o[i + 4] = map[o[i + 4]]; + o[i + 5] = map[o[i + 5]]; + o[i + 6] = map[o[i + 6]]; + o[i + 7] = map[o[i + 7]]; + } + + for (const end = o.length; i < end; i++) o[i] = map[o[i]]; + + const b = new FastBuffer(o.buffer, o.byteOffset, o.byteLength); + if (isBigEndian) b.swap16(); + const string = b.ucs2Slice(); + if (fatal && incomplete && StringPrototypeIncludes(string, '\uFFFD')) { + throw new ERR_ENCODING_INVALID_ENCODED_DATA(encoding, undefined); + } + return string; + }; + + decoders.set(id, decoder); + return decoder; +} + +module.exports = { + isSinglebyteEncoding, + createSinglebyteDecoder, + getEncoding, // for tests +}; diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index f68dd9522a0f69..1bf528de5f029f 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -1,7 +1,6 @@ #include "encoding_binding.h" #include "ada.h" #include "env-inl.h" -#include "node_buffer.h" #include "node_errors.h" #include "node_external_reference.h" #include "simdutf.h" @@ -414,8 +413,6 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data, SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8); SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII); SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode); - SetMethodNoSideEffect( - isolate, target, "decodeWindows1252", DecodeWindows1252); } void BindingData::CreatePerContextProperties(Local target, @@ -433,77 +430,6 @@ void BindingData::RegisterTimerExternalReferences( registry->Register(DecodeUTF8); registry->Register(ToASCII); registry->Register(ToUnicode); - registry->Register(DecodeWindows1252); -} - -void BindingData::DecodeWindows1252(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - - CHECK_GE(args.Length(), 1); - if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || - args[0]->IsArrayBufferView())) { - return node::THROW_ERR_INVALID_ARG_TYPE( - env->isolate(), - "The \"input\" argument must be an instance of ArrayBuffer, " - "SharedArrayBuffer, or ArrayBufferView."); - } - - bool ignore_bom = args[1]->IsTrue(); - - ArrayBufferViewContents buffer(args[0]); - const uint8_t* data = buffer.data(); - size_t length = buffer.length(); - - if (ignore_bom && length > 0 && data[0] == 0xFF) { - data++; - length--; - } - - if (length == 0) { - return args.GetReturnValue().SetEmptyString(); - } - - // Windows-1252 specific mapping for bytes 128-159 - // These differ from Latin-1/ISO-8859-1 - static const uint16_t windows1252_mapping[32] = { - 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F - 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F - }; - - std::string result; - result.reserve(length * 3); // Reserve space for UTF-8 output - - for (size_t i = 0; i < length; i++) { - uint8_t byte = data[i]; - uint32_t codepoint; - - // Check if byte is in the special Windows-1252 range (128-159) - if (byte >= 0x80 && byte <= 0x9F) { - codepoint = windows1252_mapping[byte - 0x80]; - } else { - // For all other bytes, Windows-1252 is identical to Latin-1 - codepoint = byte; - } - - // Convert codepoint to UTF-8 - if (codepoint < 0x80) { - result.push_back(static_cast(codepoint)); - } else if (codepoint < 0x800) { - result.push_back(static_cast(0xC0 | (codepoint >> 6))); - result.push_back(static_cast(0x80 | (codepoint & 0x3F))); - } else { - result.push_back(static_cast(0xE0 | (codepoint >> 12))); - result.push_back(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); - result.push_back(static_cast(0x80 | (codepoint & 0x3F))); - } - } - - Local ret; - if (ToV8Value(env->context(), result, env->isolate()).ToLocal(&ret)) { - args.GetReturnValue().Set(ret); - } } } // namespace encoding_binding diff --git a/src/encoding_binding.h b/src/encoding_binding.h index 8393702cce855f..2690cb74f8a05b 100644 --- a/src/encoding_binding.h +++ b/src/encoding_binding.h @@ -31,8 +31,6 @@ class BindingData : public SnapshotableObject { static void EncodeInto(const v8::FunctionCallbackInfo& args); static void EncodeUtf8String(const v8::FunctionCallbackInfo& args); static void DecodeUTF8(const v8::FunctionCallbackInfo& args); - static void DecodeWindows1252( - const v8::FunctionCallbackInfo& args); static void ToASCII(const v8::FunctionCallbackInfo& args); static void ToUnicode(const v8::FunctionCallbackInfo& args); diff --git a/test/parallel/test-bootstrap-modules.js b/test/parallel/test-bootstrap-modules.js index b050f5bffde04a..d69a299625d9f2 100644 --- a/test/parallel/test-bootstrap-modules.js +++ b/test/parallel/test-bootstrap-modules.js @@ -88,6 +88,7 @@ expected.beforePreExec = new Set([ 'NativeModule internal/process/signal', 'Internal Binding fs', 'NativeModule internal/encoding', + 'NativeModule internal/encoding/single-byte', 'NativeModule internal/blob', 'NativeModule internal/fs/utils', 'NativeModule fs', diff --git a/test/parallel/test-internal-encoding-binding.js b/test/parallel/test-internal-encoding-binding.js deleted file mode 100644 index 7d5397d213c205..00000000000000 --- a/test/parallel/test-internal-encoding-binding.js +++ /dev/null @@ -1,53 +0,0 @@ -// Flags: --expose-internals - -'use strict'; - -require('../common'); - -const assert = require('node:assert'); -const { internalBinding } = require('internal/test/binding'); -const binding = internalBinding('encoding_binding'); - -// Windows-1252 specific tests -{ - // Test Windows-1252 special characters in 128-159 range - // These differ from Latin-1 - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x80), false, false), '€'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x82), false, false), '‚'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x83), false, false), 'ƒ'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x9F), false, false), 'Ÿ'); -} - -{ - // Test Windows-1252 characters outside 128-159 range (same as Latin-1) - const buf = Uint8Array.from([0xC1, 0xE9, 0xF3]); - assert.strictEqual(binding.decodeWindows1252(buf, false, false), 'Áéó'); -} - -{ - // Empty input - const buf = Uint8Array.from([]); - assert.strictEqual(binding.decodeWindows1252(buf, false, false), ''); -} - -// Windows-1252 specific tests -{ - // Test Windows-1252 special characters in 128-159 range - // These differ from Latin-1 - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x80), false, false), '€'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x82), false, false), '‚'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x83), false, false), 'ƒ'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x9F), false, false), 'Ÿ'); -} - -{ - // Test Windows-1252 characters outside 128-159 range (same as Latin-1) - const buf = Uint8Array.from([0xC1, 0xE9, 0xF3]); - assert.strictEqual(binding.decodeWindows1252(buf, false, false), 'Áéó'); -} - -{ - // Empty input - const buf = Uint8Array.from([]); - assert.strictEqual(binding.decodeWindows1252(buf, false, false), ''); -} diff --git a/typings/internalBinding/encoding_binding.d.ts b/typings/internalBinding/encoding_binding.d.ts index 6833c9ac0557b1..0774a21f25e21f 100644 --- a/typings/internalBinding/encoding_binding.d.ts +++ b/typings/internalBinding/encoding_binding.d.ts @@ -4,5 +4,4 @@ export interface EncodingBinding { decodeUTF8(buffer: ArrayBufferView | ArrayBuffer | SharedArrayBuffer, ignoreBOM?: boolean, hasFatal?: boolean): string; toASCII(input: string): string; toUnicode(input: string): string; - decodeWindows1252(buffer: ArrayBufferView | ArrayBuffer | SharedArrayBuffer, ignoreBOM?: boolean, hasFatal?: boolean): string; } From 6a46ae3ec9dccf4a8740965ba20f05ce0d857ec2 Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Fri, 19 Dec 2025 00:57:27 +0400 Subject: [PATCH 2/2] src: move all 1-byte encodings to native --- lib/internal/encoding.js | 48 ++- lib/internal/encoding/single-byte.js | 155 ---------- src/encoding_binding.cc | 59 ++++ src/encoding_binding.h | 1 + src/encoding_singlebyte.h | 380 ++++++++++++++++++++++++ src/string_bytes.cc | 6 + src/string_bytes.h | 4 + test/parallel/test-bootstrap-modules.js | 1 - 8 files changed, 495 insertions(+), 159 deletions(-) delete mode 100644 lib/internal/encoding/single-byte.js create mode 100644 src/encoding_singlebyte.h diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index d30e68df4304a2..f0329822d53c8a 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -4,11 +4,13 @@ // https://encoding.spec.whatwg.org const { + ArrayPrototypeMap, Boolean, ObjectDefineProperties, ObjectGetOwnPropertyDescriptors, ObjectSetPrototypeOf, ObjectValues, + SafeArrayIterator, SafeMap, StringPrototypeSlice, Symbol, @@ -32,8 +34,6 @@ const kFatal = Symbol('kFatal'); const kUTF8FastPath = Symbol('kUTF8FastPath'); const kIgnoreBOM = Symbol('kIgnoreBOM'); -const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte'); - const { getConstructorOf, customInspectSymbol: inspect, @@ -58,6 +58,7 @@ const { encodeIntoResults, encodeUtf8String, decodeUTF8, + decodeSingleByte, } = binding; function validateDecoder(obj) { @@ -71,6 +72,47 @@ const CONVERTER_FLAGS_IGNORE_BOM = 0x4; const empty = new FastBuffer(); +// Has to be synced with src/ +const encodingsSinglebyte = new SafeMap(new SafeArrayIterator(ArrayPrototypeMap([ + 'ibm866', + 'koi8-r', + 'koi8-u', + 'macintosh', + 'x-mac-cyrillic', + 'iso-8859-2', + 'iso-8859-3', + 'iso-8859-4', + 'iso-8859-5', + 'iso-8859-6', + 'iso-8859-7', + 'iso-8859-8', + 'iso-8859-8-i', + 'iso-8859-10', + 'iso-8859-13', + 'iso-8859-14', + 'iso-8859-15', + 'iso-8859-16', + 'windows-874', + 'windows-1250', + 'windows-1251', + 'windows-1252', + 'windows-1253', + 'windows-1254', + 'windows-1255', + 'windows-1256', + 'windows-1257', + 'windows-1258', + 'x-user-defined', // Has to be last, special case +], (e, i) => [e, i]))); + +const isSinglebyteEncoding = (enc) => encodingsSinglebyte.has(enc); + +function createSinglebyteDecoder(encoding, fatal) { + const key = encodingsSinglebyte.get(encoding); + if (key === undefined) throw new ERR_ENCODING_NOT_SUPPORTED(encoding); + return (buf) => decodeSingleByte(buf, key, fatal); +} + const encodings = new SafeMap([ ['unicode-1-1-utf-8', 'utf-8'], ['unicode11utf8', 'utf-8'], @@ -460,7 +502,7 @@ function makeTextDecoderICU() { validateDecoder(this); validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); - if (this[kMethod]) return this[kMethod](parseInput(input)); + if (this[kMethod]) return this[kMethod](input); this[kUTF8FastPath] &&= !(options?.stream); diff --git a/lib/internal/encoding/single-byte.js b/lib/internal/encoding/single-byte.js deleted file mode 100644 index df8042e89ad969..00000000000000 --- a/lib/internal/encoding/single-byte.js +++ /dev/null @@ -1,155 +0,0 @@ -// Simplified version extracted from https://npmjs.com/package/@exodus/bytes codepath for 1-byte encodings -// Copyright Exodus Movement. Licensed under MIT License. - -'use strict'; - -const { - Array, - ArrayPrototypeFill, - ObjectKeys, - ObjectPrototypeHasOwnProperty, - SafeArrayIterator, - SafeMap, - SafeSet, - StringPrototypeIncludes, - TypedArrayFrom, - TypedArrayOf, - TypedArrayPrototypeIncludes, - TypedArrayPrototypeSet, - Uint16Array, -} = primordials; - -const { isAscii } = require('buffer'); - -const { FastBuffer } = require('internal/buffer'); - -const { - ERR_ENCODING_NOT_SUPPORTED, - ERR_ENCODING_INVALID_ENCODED_DATA, -} = require('internal/errors').codes; - -const isBigEndian = new FastBuffer(TypedArrayOf(Uint16Array, 258).buffer)[1] === 2; - -const it = (x) => new SafeArrayIterator(x); - -/* fallback/single-byte.encodings.js */ - -const r = 0xfffd; -const e = (x) => it(ArrayPrototypeFill(new Array(x), 1)); -const h = (x) => it(ArrayPrototypeFill(new Array(x), r)); - -/* eslint-disable @stylistic/js/max-len */ - -// Index tables from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings -// Each table in the spec lists only mapping from byte 0x80 onwards, as below that they are all ASCII and mapped as idenity -// Here, 0xfffd (replacement charcode) designates a hole (unmapped offset), as not all encodings map all offsets -// All other numbers are deltas from the last seen mapped value, starting with 0x7f (127, highest ASCII) -// Thus, [0x80, 0x81, , 0x83] is stored as [1, 1, r, 2] -// Truncation (length < 128) means that all remaining ones are mapped as identity (offset i => codepoint i), not unmapped -const encodings = { - '__proto__': null, - 'ibm866': [913, ...e(47), 8530, 1, 1, -145, 34, 61, 1, -12, -1, 14, -18, 6, 6, -1, -1, -75, 4, 32, -8, -16, -28, 60, 34, 1, -5, -6, 21, -3, -6, -16, 28, -5, 1, -4, 1, -12, -1, -6, 1, 24, -1, -82, -12, 124, -4, 8, 4, -16, -8512, ...e(15), -78, 80, -77, 80, -77, 80, -73, 80, -942, 8553, -8546, 8547, -260, -8306, 9468, -9472], - 'iso-8859-10': [...e(33), 100, 14, 16, 8, -2, 14, -143, 148, -43, 80, 6, 23, -208, 189, -32, -154, 85, 14, 16, 8, -2, 14, -128, 133, -43, 80, 6, 23, 7831, -7850, -32, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 1, 1, 117, 7, -121, 1, 1, 1, 146, -144, 154, -152, ...e(5), 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 1, 1, 86, 7, -90, 1, 1, 1, 115, -113, 123, -121, 1, 1, 1, 1, 58], - 'iso-8859-13': [...e(33), 8061, -8059, 1, 1, 8058, -8056, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, 1, 1, 1, 8041, -8039, 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 7835], - 'iso-8859-14': [...e(33), 7522, 1, -7520, 103, 1, 7423, -7523, 7641, -7639, 7641, -119, 231, -7749, 1, 202, 7334, 1, -7423, 1, 7455, 1, -7563, 7584, 43, -42, 44, -35, 147, -111, 1, -36, -7585, ...e(15), 165, -163, ...e(5), 7572, -7570, ...e(5), 153, -151, ...e(16), 134, -132, ...e(5), 7541, -7539, ...e(5), 122], - 'iso-8859-15': [...e(33), 1, 1, 1, 8201, -8199, 187, -185, 186, -184, ...e(10), 202, -200, 1, 1, 199, -197, 1, 1, 151, 1, 37], - 'iso-8859-16': [...e(33), 100, 1, 60, 8043, -142, -7870, -185, 186, -184, 367, -365, 206, -204, 205, 1, -203, 1, 91, 54, 59, 7840, -8039, 1, 199, -113, 268, -350, 151, 1, 37, 4, -188, 1, 1, 64, -62, 66, -64, ...e(9), 65, 51, -113, 1, 1, 124, -122, 132, 22, -151, 1, 1, 1, 60, 258, -315, 1, 1, 1, 33, -31, 35, -33, ...e(9), 34, 51, -82, 1, 1, 93, -91, 101, 22, -120, 1, 1, 1, 29, 258], - 'iso-8859-2': [...e(33), 100, 468, -407, -157, 153, 29, -179, 1, 184, -2, 6, 21, -204, 208, -2, -203, 85, 470, -409, -142, 138, 29, 364, -527, 169, -2, 6, 21, 355, -351, -2, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374], - 'iso-8859-3': [...e(33), 134, 434, -565, 1, r, 128, -125, 1, 136, 46, -64, 22, -135, r, 206, -203, 119, -117, 1, 1, 1, 112, -110, 1, 121, 46, -64, 22, -120, r, 191, -188, 1, 1, r, 2, 70, -2, -65, ...e(8), r, 2, 1, 1, 1, 76, -74, 1, 69, -67, 1, 1, 1, 144, -16, -125, 1, 1, 1, r, 2, 39, -2, -34, ...e(8), r, 2, 1, 1, 1, 45, -43, 1, 38, -36, 1, 1, 1, 113, -16, 380], - 'iso-8859-4': [...e(33), 100, 52, 30, -178, 132, 19, -148, 1, 184, -78, 16, 68, -185, 208, -206, 1, 85, 470, -388, -163, 117, 19, 395, -527, 169, -78, 16, 68, -29, 52, -51, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 92, -26, 53, 7, -22, -98, 1, 1, 1, 1, 154, -152, 1, 1, 140, 2, -139, 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 61, -26, 53, 7, -22, -67, 1, 1, 1, 1, 123, -121, 1, 1, 109, 2, 366], - 'iso-8859-5': [...e(33), 865, ...e(11), -863, 865, ...e(65), 7367, -7365, ...e(11), -949, 951, 1], - 'iso-8859-6': [...e(33), r, r, r, 4, ...h(7), 1384, -1375, ...h(13), 1390, r, r, r, 4, r, 2, ...e(25), r, r, r, r, r, 6, ...e(18), ...h(13)], - 'iso-8859-7': [...e(33), 8056, 1, -8054, 8201, 3, -8201, 1, 1, 1, 721, -719, 1, 1, r, 8040, -8037, 1, 1, 1, 721, 1, 1, -719, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r], - 'iso-8859-8': [...e(33), r, 2, ...e(7), 46, -44, ...e(14), 62, -60, 1, 1, 1, ...h(32), 8025, -6727, ...e(26), r, r, 6692, 1, r], - 'koi8-r': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 8450, ...e(14), -8544, 8545, ...e(10), -9411, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3], - 'koi8-u': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 3, 8448, -8446, 1, 8448, 1, 1, 1, 1, -8394, -51, 8448, 1, 1, 1, -8544, 3, 8543, -8541, 1, 8543, 1, 1, 1, 1, -8410, -130, -869, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3], - 'macintosh': [69, 1, 2, 2, 8, 5, 6, 5, -1, 2, 2, -1, 2, 2, 2, -1, 2, 1, 2, -1, 2, 1, 2, 2, -1, 2, 2, -1, 5, -1, 2, 1, 7972, -8048, -14, 1, 4, 8059, -8044, 41, -49, -5, 8313, -8302, -12, 8632, -8602, 18, 8518, -8557, 8627, 1, -8640, 16, 8525, 15, -2, -7759, 7787, -8577, 16, 751, -707, 18, -57, -30, 11, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 32, 3, 18, 125, 1, 7872, 1, 8, 1, -5, 1, -7970, 9427, -9419, 121, 7884, 104, -115, 1, 56007, 1, -56033, -8042, 8035, 4, 18, -8046, 8, -9, 10, -3, 5, 1, 1, -3, 7, 1, 63531, -63533, 8, 1, -2, 88, 405, 22, -557, 553, 1, 1, -546, 549, -2, -20], - 'windows-1250': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -7888, 7897, -7903, 10, 25, -4, -233, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8129, 7897, -7903, 10, 25, -4, -218, 551, 17, -407, -157, 96, -94, 1, 1, 1, 181, -179, 1, 1, 1, 205, -203, 1, 554, -409, -142, 1, 1, 1, 1, 77, 90, -164, 130, 416, -415, 62, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374], - 'windows-1251': [899, 1, 7191, -7111, 7115, 8, -6, 1, 139, -124, -7207, 7216, -7215, 2, -1, 4, 67, 7110, 1, 3, 1, 5, -15, 1, -8060, 8330, -7369, 7137, -7136, 2, -1, 4, -959, 878, 80, -86, -868, 1004, -1002, 1, 858, -856, 859, -857, 1, 1, 1, 857, -855, 1, 853, 80, 59, -988, 1, 1, 922, 7365, -7362, -921, 925, -83, 80, 2, -71, ...e(63)], - 'windows-1252': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 240, -238, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 225, -6], - 'windows-1253': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 1, 1, 1, 1, 741, 1, -739, 1, 1, 1, 1, 1, 1, r, 2, 1, 1, 1, 8039, -8037, 1, 1, 1, 721, -719, 1, 1, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r], - 'windows-1254': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 1, 218, -216, ...e(47), 79, -77, ...e(11), 84, 46, -127, ...e(16), 48, -46, ...e(11), 53, 46], - 'windows-1255': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -8094, ...e(7), 8199, -8197, 1, 1, 1, 1, 46, -44, ...e(14), 62, -60, 1, 1, 1, 1, 1265, ...e(19), 45, 1, 1, 1, 1, ...h(7), -36, ...e(26), r, r, 6692, 1, r], - 'windows-1256': [8237, -6702, 6556, -7816, 7820, 8, -6, 1, -7515, 7530, -6583, 6592, -7911, 1332, 18, -16, 39, 6505, 1, 3, 1, 5, -15, 1, -6507, 6777, -6801, 6569, -7911, 7865, 1, -6483, -1562, 1388, -1386, ...e(7), 1557, -1555, ...e(14), 1378, -1376, 1, 1, 1, 1377, 162, -160, ...e(21), -1375, 1376, 1, 1, 1, 6, 1, 1, 1, -1379, 1380, -1378, 1379, 1, 1, 1, -1377, 1, 1, 1, 1, 1374, 1, -1372, 1, 1372, 1, 1, 1, -1370, 1371, 1, -1369, 1370, -1368, 1369, -1367, 1, 7954, 1, -6461], - 'windows-1257': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 28, 543, -527, -40, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 19, 556, -572, 1, r, 2, 1, 1, r, 2, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, ...e(5), 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 347], - 'windows-1258': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -7911, -182, 1, 218, -216, ...e(34), 64, -62, ...e(7), 565, -563, 1, 1, 65, -63, 568, -566, 1, 204, -202, 1, 1, 1, 1, 1, 1, 211, 340, -548, 1, 1, 1, 33, -31, ...e(7), 534, -532, 1, 1, 34, -32, 562, -560, 1, 173, -171, 1, 1, 1, 1, 1, 1, 180, 7931], - 'windows-874': [8237, -8235, 1, 1, 1, 8098, -8096, ...e(10), 8072, 1, 3, 1, 5, -15, 1, -8060, ...e(8), 3425, ...e(57), r, r, r, r, 5, ...e(28), r, r, r, r], - 'x-mac-cyrillic': [913, ...e(31), 7153, -8048, 992, -1005, 4, 8059, -8044, 848, -856, -5, 8313, -7456, 80, 7694, -7773, 80, 7627, -8557, 8627, 1, -7695, -929, 988, -137, -4, 80, -77, 80, -78, 80, -79, 80, -2, -83, -857, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 875, 80, -79, 80, -7, 7102, 1, 8, 1, -5, 1, -7970, 7975, -7184, 80, -79, 80, 7351, -7445, 80, -2, -31, ...e(30), 7262], -}; - -/* eslint-enable @stylistic/js/max-len */ - -/* fallback/single-byte.js + single-byte.node.js, simplified */ - -const l256 = { __proto__: null, length: 256 }; - -function getEncoding(encoding) { - if (encoding === 'x-user-defined') { - // https://encoding.spec.whatwg.org/#x-user-defined-decoder, 14.5.1. x-user-defined decoder - return TypedArrayFrom(Uint16Array, l256, (_, i) => (i >= 0x80 ? 0xf700 + i : i)); - } - - if (!ObjectPrototypeHasOwnProperty(encodings, encoding)) { - throw new ERR_ENCODING_NOT_SUPPORTED(encoding); - } - - const map = TypedArrayFrom(Uint16Array, l256, (_, i) => i); // Unicode subset - let prev = 127; - map.set(TypedArrayFrom(Uint16Array, it(encodings[encoding]), (x) => (x === r ? x : (prev += x))), 128); - return map; -} - -const supported = new SafeSet(it(ObjectKeys(encodings))).add('iso-8859-8-i').add('x-user-defined'); -const isSinglebyteEncoding = (enc) => supported.has(enc); - -const decodersLoose = new SafeMap(); -const decodersFatal = new SafeMap(); - -function createSinglebyteDecoder(encoding, fatal) { - const id = encoding === 'iso-8859-8-i' ? 'iso-8859-8' : encoding; - const decoders = fatal ? decodersFatal : decodersLoose; - const cached = decoders.get(id); - if (cached) return cached; - - const map = getEncoding(id); - const incomplete = TypedArrayPrototypeIncludes(map, r); - - // Expects type-checked Buffer input - const decoder = (buf) => { - if (buf.byteLength === 0) return ''; - if (isAscii(buf)) return buf.latin1Slice(); // .latin1Slice is faster than .asciiSlice - const o = new Uint16Array(buf.length); - TypedArrayPrototypeSet(o, buf); // Copy to modify in-place, also those are 16-bit now - - let i = 0; - for (const end7 = o.length - 7; i < end7; i += 8) { - o[i] = map[o[i]]; - o[i + 1] = map[o[i + 1]]; - o[i + 2] = map[o[i + 2]]; - o[i + 3] = map[o[i + 3]]; - o[i + 4] = map[o[i + 4]]; - o[i + 5] = map[o[i + 5]]; - o[i + 6] = map[o[i + 6]]; - o[i + 7] = map[o[i + 7]]; - } - - for (const end = o.length; i < end; i++) o[i] = map[o[i]]; - - const b = new FastBuffer(o.buffer, o.byteOffset, o.byteLength); - if (isBigEndian) b.swap16(); - const string = b.ucs2Slice(); - if (fatal && incomplete && StringPrototypeIncludes(string, '\uFFFD')) { - throw new ERR_ENCODING_INVALID_ENCODED_DATA(encoding, undefined); - } - return string; - }; - - decoders.set(id, decoder); - return decoder; -} - -module.exports = { - isSinglebyteEncoding, - createSinglebyteDecoder, - getEncoding, // for tests -}; diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 1bf528de5f029f..cb902924661af4 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -1,4 +1,5 @@ #include "encoding_binding.h" +#include "encoding_singlebyte.h" #include "ada.h" #include "env-inl.h" #include "node_errors.h" @@ -379,6 +380,62 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { } } +void BindingData::DecodeSingleByte(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + + CHECK_GE(args.Length(), 2); + + if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || + args[0]->IsArrayBufferView())) { + return node::THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"list\" argument must be an instance of SharedArrayBuffer, " + "ArrayBuffer or ArrayBufferView."); + } + + CHECK(args[1]->IsInt32()); + const int encoding = args[1].As()->Value(); + CHECK(encoding >= 0 && encoding < 29); + + ArrayBufferViewContents buffer(args[0]); + const uint8_t* data = buffer.data(); + size_t length = buffer.length(); + + if (length == 0) return args.GetReturnValue().SetEmptyString(); + + if (!simdutf::validate_ascii_with_errors(reinterpret_cast(data), length).error) { + Local ret; + if (StringBytes::Encode(env->isolate(), reinterpret_cast(data), length, LATIN1).ToLocal(&ret)) { + args.GetReturnValue().Set(ret); + } + return; + } + + uint16_t* dst = node::UncheckedMalloc(length); + + if (encoding == 28) { + // x-user-defined + for (size_t i = 0; i < length; i++) dst[i] = data[i] >= 0x80 ? data[i] + 0xf700 : data[i]; + } else { + bool has_fatal = args[2]->IsTrue(); + + const uint16_t* table = tSingleByteEncodings[encoding]; + for (size_t i = 0; i < length; i++) dst[i] = table[data[i]]; + + if (has_fatal && fSingleByteEncodings[encoding] && + simdutf::find(reinterpret_cast(dst), reinterpret_cast(dst) + length, 0xfffd) != reinterpret_cast(dst) + length + ) { + return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( + env->isolate(), "The encoded data was not valid for this encoding"); + } + } + + Local ret; + if (StringBytes::Raw(env->isolate(), dst, length).ToLocal(&ret)) { + args.GetReturnValue().Set(ret); + } +} + void BindingData::ToASCII(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 1); @@ -411,6 +468,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data, SetMethod(isolate, target, "encodeInto", EncodeInto); SetMethodNoSideEffect(isolate, target, "encodeUtf8String", EncodeUtf8String); SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8); + SetMethodNoSideEffect(isolate, target, "decodeSingleByte", DecodeSingleByte); SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII); SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode); } @@ -428,6 +486,7 @@ void BindingData::RegisterTimerExternalReferences( registry->Register(EncodeInto); registry->Register(EncodeUtf8String); registry->Register(DecodeUTF8); + registry->Register(DecodeSingleByte); registry->Register(ToASCII); registry->Register(ToUnicode); } diff --git a/src/encoding_binding.h b/src/encoding_binding.h index 2690cb74f8a05b..c1eae9472878d0 100644 --- a/src/encoding_binding.h +++ b/src/encoding_binding.h @@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject { static void EncodeInto(const v8::FunctionCallbackInfo& args); static void EncodeUtf8String(const v8::FunctionCallbackInfo& args); static void DecodeUTF8(const v8::FunctionCallbackInfo& args); + static void DecodeSingleByte(const v8::FunctionCallbackInfo& args); static void ToASCII(const v8::FunctionCallbackInfo& args); static void ToUnicode(const v8::FunctionCallbackInfo& args); diff --git a/src/encoding_singlebyte.h b/src/encoding_singlebyte.h new file mode 100644 index 00000000000000..7c8b1c87ad9e56 --- /dev/null +++ b/src/encoding_singlebyte.h @@ -0,0 +1,380 @@ +// Generated from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings +// Mapping for 0x80-0xFF, 0xFFFD designates unmapped offset (replacement) +// Flag is whether the table contains 0xFFFD or not +namespace { + +#define tROW(x) x, x+1, x+2, x+3, x+4, x+5, x+6, x+7, x+8, x+9, x+10, x+11, x+12, x+13, x+14, x+15 +#define tASCII tROW(0x00), tROW(0x10), tROW(0x20), tROW(0x30), tROW(0x40), tROW(0x50), tROW(0x60), tROW(0x70) + +static constexpr bool fIBM866 = false; +static constexpr uint16_t tIBM866[256] = { + tASCII, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, + 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E, 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0 +}; + +static constexpr bool fISO_8859_2 = false; +static constexpr uint16_t tISO_8859_2[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B, + 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C, + 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, + 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, + 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, + 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9 +}; + +static constexpr bool fISO_8859_3 = true; +static constexpr uint16_t tISO_8859_3[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7, 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B, + 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7, 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C, + 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7, 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9 +}; + +static constexpr bool fISO_8859_4 = false; +static constexpr uint16_t tISO_8859_4[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF, + 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7, 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B, + 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A, + 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF, + 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B, + 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9 +}; + +static constexpr bool fISO_8859_5 = false; +static constexpr uint16_t tISO_8859_5[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, + 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F +}; + +static constexpr bool fISO_8859_6 = true; +static constexpr uint16_t tISO_8859_6[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F, + 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, + 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F, + 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD +}; + +static constexpr bool fISO_8859_7 = true; +static constexpr uint16_t tISO_8859_7[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x2018, 0x2019, 0x00A3, 0x20AC, 0x20AF, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x037A, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, + 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, + 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, + 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD +}; + +static constexpr bool fISO_8859_8 = true; +static constexpr uint16_t tISO_8859_8[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017, + 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, + 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD +}; + +static constexpr bool fISO_8859_10 = false; +static constexpr uint16_t tISO_8859_10[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A, + 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7, 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B, + 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF, + 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168, 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, + 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF, + 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138 +}; + +static constexpr bool fISO_8859_13 = false; +static constexpr uint16_t tISO_8859_13[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, + 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, + 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, + 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, + 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019 +}; + +static constexpr bool fISO_8859_14 = false; +static constexpr uint16_t tISO_8859_14[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178, + 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, + tROW(0xC0), + 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, + tROW(0xE0), + 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF +}; + +static constexpr bool fISO_8859_15 = false; +static constexpr uint16_t tISO_8859_15[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7, 0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF, + tROW(0xC0), + tROW(0xD0), + tROW(0xE0), + tROW(0xF0), +}; + +static constexpr bool fISO_8859_16 = false; +static constexpr uint16_t tISO_8859_16[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B, + 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C, + 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF +}; + +static constexpr bool fKOI8_R = false; +static constexpr uint16_t tKOI8_R[256] = { + tASCII, + 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, + 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, + 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E, + 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9, + 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, + 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, + 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, + 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A +}; + +static constexpr bool fKOI8_U = false; +static constexpr uint16_t tKOI8_U[256] = { + tASCII, + 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, + 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, + 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x045E, 0x255E, + 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407, 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x040E, 0x00A9, + 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, + 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, + 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, + 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A +}; + +static constexpr bool fMacintosh = false; +static constexpr uint16_t tMacintosh[256] = { + tASCII, + 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, + 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, + 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, + 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, + 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, + 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, + 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, + 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7 +}; + +static constexpr bool fWindows874 = true; +static constexpr uint16_t tWindows874[256] = { + tASCII, + 0x20AC, 0x0081, 0x0082, 0x0083, 0x0084, 0x2026, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, + 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, + 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, + 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, + 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, + 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD +}; + +static constexpr bool fWindows1250 = false; +static constexpr uint16_t tWindows1250[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0083, 0x201E, 0x2026, 0x2020, 0x2021, 0x0088, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0098, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A, + 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B, + 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C, + 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, + 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, + 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, + 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9 +}; + +static constexpr bool fWindows1251 = false; +static constexpr uint16_t tWindows1251[256] = { + tASCII, + 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F, + 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0098, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F, + 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407, + 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F +}; + +static constexpr bool fWindows1252 = false; +static constexpr uint16_t tWindows1252[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, + tROW(0xA0), + tROW(0xB0), + tROW(0xC0), + tROW(0xD0), + tROW(0xE0), + tROW(0xF0), +}; + +static constexpr bool fWindows1253 = true; +static constexpr uint16_t tWindows1253[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x0088, 0x2030, 0x008A, 0x2039, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0098, 0x2122, 0x009A, 0x203A, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, + 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, + 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, + 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD +}; + +static constexpr bool fWindows1254 = false; +static constexpr uint16_t tWindows1254[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x008E, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x009E, 0x0178, + tROW(0xA0), + tROW(0xB0), + tROW(0xC0), + 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, + tROW(0xE0), + 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF +}; + +static constexpr bool fWindows1255 = true; +static constexpr uint16_t tWindows1255[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x008A, 0x2039, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x009A, 0x203A, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF, + 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, + 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD +}; + +static constexpr bool fWindows1256 = false; +static constexpr uint16_t tWindows1256[256] = { + tASCII, + 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, + 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA, + 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F, + 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, + 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7, 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643, + 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF, + 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7, 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2 +}; + +static constexpr bool fWindows1257 = true; +static constexpr uint16_t tWindows1257[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0083, 0x201E, 0x2026, 0x2020, 0x2021, 0x0088, 0x2030, 0x008A, 0x2039, 0x008C, 0x00A8, 0x02C7, 0x00B8, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0098, 0x2122, 0x009A, 0x203A, 0x009C, 0x00AF, 0x02DB, 0x009F, + 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, + 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, + 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, + 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, + 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9 +}; + +static constexpr bool fWindows1258 = false; +static constexpr uint16_t tWindows1258[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x008A, 0x2039, 0x0152, 0x008D, 0x008E, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x009A, 0x203A, 0x0153, 0x009D, 0x009E, 0x0178, + tROW(0xA0), + tROW(0xB0), + 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF, + 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF, + 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF +}; + +static constexpr bool fXMacCyrillic = false; +static constexpr uint16_t tXMacCyrillic[256] = { + tASCII, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x2020, 0x00B0, 0x0490, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x0406, 0x00AE, 0x00A9, 0x2122, 0x0402, 0x0452, 0x2260, 0x0403, 0x0453, + 0x221E, 0x00B1, 0x2264, 0x2265, 0x0456, 0x00B5, 0x0491, 0x0408, 0x0404, 0x0454, 0x0407, 0x0457, 0x0409, 0x0459, 0x040A, 0x045A, + 0x0458, 0x0405, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x040B, 0x045B, 0x040C, 0x045C, 0x0455, + 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x201E, 0x040E, 0x045E, 0x040F, 0x045F, 0x2116, 0x0401, 0x0451, 0x044F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x20AC +}; + +} + +// Matches the list of encoding sin single-byte.js +const uint16_t *const tSingleByteEncodings[28] = { + tIBM866, tKOI8_R, tKOI8_U, tMacintosh, tXMacCyrillic, + tISO_8859_2, tISO_8859_3, tISO_8859_4, tISO_8859_5, tISO_8859_6, tISO_8859_7, + tISO_8859_8, tISO_8859_8, // second time for for 8-i + tISO_8859_10, tISO_8859_13, tISO_8859_14, tISO_8859_15, tISO_8859_16, + tWindows874, tWindows1250, tWindows1251, tWindows1252, tWindows1253, + tWindows1254, tWindows1255, tWindows1256, tWindows1257, tWindows1258 +}; + +// Matches the list of encoding sin single-byte.js +const bool fSingleByteEncodings[28] = { + fIBM866, fKOI8_R, fKOI8_U, fMacintosh, fXMacCyrillic, + fISO_8859_2, fISO_8859_3, fISO_8859_4, fISO_8859_5, fISO_8859_6, fISO_8859_7, + fISO_8859_8, fISO_8859_8, // second time for for 8-i + fISO_8859_10, fISO_8859_13, fISO_8859_14, fISO_8859_15, fISO_8859_16, + fWindows874, fWindows1250, fWindows1251, fWindows1252, fWindows1253, + fWindows1254, fWindows1255, fWindows1256, fWindows1257, fWindows1258 +}; diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 03b5fd7ebe3816..15a998e2718543 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -660,4 +660,10 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, return Encode(isolate, buf, len, encoding); } +MaybeLocal StringBytes::Raw(Isolate* isolate, + uint16_t* buf, + size_t buflen) { + return ExternTwoByteString::New(isolate, buf, buflen); +} + } // namespace node diff --git a/src/string_bytes.h b/src/string_bytes.h index 9949f508f83ffe..c05ad26f6330b0 100644 --- a/src/string_bytes.h +++ b/src/string_bytes.h @@ -95,6 +95,10 @@ class StringBytes { const char* buf, enum encoding encoding); + static v8::MaybeLocal Raw(v8::Isolate* isolate, + uint16_t* buf, + size_t buflen); + private: static size_t WriteUCS2(v8::Isolate* isolate, char* buf, diff --git a/test/parallel/test-bootstrap-modules.js b/test/parallel/test-bootstrap-modules.js index d69a299625d9f2..b050f5bffde04a 100644 --- a/test/parallel/test-bootstrap-modules.js +++ b/test/parallel/test-bootstrap-modules.js @@ -88,7 +88,6 @@ expected.beforePreExec = new Set([ 'NativeModule internal/process/signal', 'Internal Binding fs', 'NativeModule internal/encoding', - 'NativeModule internal/encoding/single-byte', 'NativeModule internal/blob', 'NativeModule internal/fs/utils', 'NativeModule fs',