diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 61f48f3395fba7..f0329822d53c8a 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -4,31 +4,34 @@ // https://encoding.spec.whatwg.org const { + ArrayPrototypeMap, Boolean, ObjectDefineProperties, ObjectGetOwnPropertyDescriptors, ObjectSetPrototypeOf, ObjectValues, + SafeArrayIterator, SafeMap, StringPrototypeSlice, Symbol, SymbolToStringTag, - Uint8Array, } = primordials; +const { FastBuffer } = require('internal/buffer'); + const { ERR_ENCODING_NOT_SUPPORTED, ERR_INVALID_ARG_TYPE, ERR_INVALID_THIS, ERR_NO_ICU, } = require('internal/errors').codes; +const kMethod = Symbol('method'); const kHandle = Symbol('handle'); const kFlags = Symbol('flags'); const kEncoding = Symbol('encoding'); const kDecoder = Symbol('decoder'); const kFatal = Symbol('kFatal'); const kUTF8FastPath = Symbol('kUTF8FastPath'); -const kWindows1252FastPath = Symbol('kWindows1252FastPath'); const kIgnoreBOM = Symbol('kIgnoreBOM'); const { @@ -55,11 +58,9 @@ const { encodeIntoResults, encodeUtf8String, decodeUTF8, - decodeWindows1252, + decodeSingleByte, } = binding; -const { Buffer } = require('buffer'); - function validateDecoder(obj) { if (obj == null || obj[kDecoder] !== true) throw new ERR_INVALID_THIS('TextDecoder'); @@ -69,7 +70,48 @@ const CONVERTER_FLAGS_FLUSH = 0x1; const CONVERTER_FLAGS_FATAL = 0x2; const CONVERTER_FLAGS_IGNORE_BOM = 0x4; -const empty = new Uint8Array(0); +const empty = new FastBuffer(); + +// Has to be synced with src/ +const encodingsSinglebyte = new SafeMap(new SafeArrayIterator(ArrayPrototypeMap([ + 'ibm866', + 'koi8-r', + 'koi8-u', + 'macintosh', + 'x-mac-cyrillic', + 'iso-8859-2', + 'iso-8859-3', + 'iso-8859-4', + 'iso-8859-5', + 'iso-8859-6', + 'iso-8859-7', + 'iso-8859-8', + 'iso-8859-8-i', + 'iso-8859-10', + 'iso-8859-13', + 'iso-8859-14', + 'iso-8859-15', + 'iso-8859-16', + 'windows-874', + 'windows-1250', + 'windows-1251', + 'windows-1252', + 'windows-1253', + 'windows-1254', + 'windows-1255', + 'windows-1256', + 'windows-1257', + 'windows-1258', + 'x-user-defined', // Has to be last, special case +], (e, i) => [e, i]))); + +const isSinglebyteEncoding = (enc) => encodingsSinglebyte.has(enc); + +function createSinglebyteDecoder(encoding, fatal) { + const key = encodingsSinglebyte.get(encoding); + if (key === undefined) throw new ERR_ENCODING_NOT_SUPPORTED(encoding); + return (buf) => decodeSingleByte(buf, key, fatal); +} const encodings = new SafeMap([ ['unicode-1-1-utf-8', 'utf-8'], @@ -387,6 +429,24 @@ ObjectDefineProperties( [SymbolToStringTag]: { __proto__: null, configurable: true, value: 'TextEncoder' }, }); +function parseInput(input) { + if (isAnyArrayBuffer(input)) { + try { + return new FastBuffer(input); + } catch { + return empty; + } + } else if (isArrayBufferView(input)) { + try { + return new FastBuffer(input.buffer, input.byteOffset, input.byteLength); + } catch { + return empty; + } + } else { + throw new ERR_INVALID_ARG_TYPE('input', ['ArrayBuffer', 'ArrayBufferView'], input); + } +} + const TextDecoder = internalBinding('config').hasIntl ? makeTextDecoderICU() : @@ -420,10 +480,12 @@ function makeTextDecoderICU() { this[kFatal] = Boolean(options?.fatal); // Only support fast path for UTF-8. this[kUTF8FastPath] = enc === 'utf-8'; - this[kWindows1252FastPath] = enc === 'windows-1252'; this[kHandle] = undefined; + this[kMethod] = undefined; - if (!this[kUTF8FastPath] && !this[kWindows1252FastPath]) { + if (isSinglebyteEncoding(this.encoding)) { + this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]); + } else if (!this[kUTF8FastPath]) { this.#prepareConverter(); } } @@ -438,22 +500,18 @@ function makeTextDecoderICU() { decode(input = empty, options = kEmptyObject) { validateDecoder(this); + validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); + + if (this[kMethod]) return this[kMethod](input); this[kUTF8FastPath] &&= !(options?.stream); - this[kWindows1252FastPath] &&= !(options?.stream); if (this[kUTF8FastPath]) { return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); } - if (this[kWindows1252FastPath]) { - return decodeWindows1252(input, this[kIgnoreBOM], this[kFatal]); - } - this.#prepareConverter(); - validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); - let flags = 0; if (options !== null) flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH; @@ -476,7 +534,7 @@ function makeTextDecoderJS() { const kBOMSeen = Symbol('BOM seen'); function hasConverter(encoding) { - return encoding === 'utf-8' || encoding === 'utf-16le'; + return encoding === 'utf-8' || encoding === 'utf-16le' || isSinglebyteEncoding(encoding); } class TextDecoder { @@ -502,30 +560,20 @@ function makeTextDecoderJS() { this[kFlags] = flags; this[kEncoding] = enc; this[kBOMSeen] = false; + this[kMethod] = undefined; + + if (isSinglebyteEncoding(this.encoding)) { + this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]); + } } decode(input = empty, options = kEmptyObject) { validateDecoder(this); - if (isAnyArrayBuffer(input)) { - try { - input = Buffer.from(input); - } catch { - input = empty; - } - } else if (isArrayBufferView(input)) { - try { - input = Buffer.from(input.buffer, input.byteOffset, - input.byteLength); - } catch { - input = empty; - } - } else { - throw new ERR_INVALID_ARG_TYPE('input', - ['ArrayBuffer', 'ArrayBufferView'], - input); - } + input = parseInput(input); validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); + if (this[kMethod]) return this[kMethod](input); + if (this[kFlags] & CONVERTER_FLAGS_FLUSH) { this[kBOMSeen] = false; } diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index f68dd9522a0f69..cb902924661af4 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -1,7 +1,7 @@ #include "encoding_binding.h" +#include "encoding_singlebyte.h" #include "ada.h" #include "env-inl.h" -#include "node_buffer.h" #include "node_errors.h" #include "node_external_reference.h" #include "simdutf.h" @@ -380,6 +380,62 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { } } +void BindingData::DecodeSingleByte(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + + CHECK_GE(args.Length(), 2); + + if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || + args[0]->IsArrayBufferView())) { + return node::THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"list\" argument must be an instance of SharedArrayBuffer, " + "ArrayBuffer or ArrayBufferView."); + } + + CHECK(args[1]->IsInt32()); + const int encoding = args[1].As()->Value(); + CHECK(encoding >= 0 && encoding < 29); + + ArrayBufferViewContents buffer(args[0]); + const uint8_t* data = buffer.data(); + size_t length = buffer.length(); + + if (length == 0) return args.GetReturnValue().SetEmptyString(); + + if (!simdutf::validate_ascii_with_errors(reinterpret_cast(data), length).error) { + Local ret; + if (StringBytes::Encode(env->isolate(), reinterpret_cast(data), length, LATIN1).ToLocal(&ret)) { + args.GetReturnValue().Set(ret); + } + return; + } + + uint16_t* dst = node::UncheckedMalloc(length); + + if (encoding == 28) { + // x-user-defined + for (size_t i = 0; i < length; i++) dst[i] = data[i] >= 0x80 ? data[i] + 0xf700 : data[i]; + } else { + bool has_fatal = args[2]->IsTrue(); + + const uint16_t* table = tSingleByteEncodings[encoding]; + for (size_t i = 0; i < length; i++) dst[i] = table[data[i]]; + + if (has_fatal && fSingleByteEncodings[encoding] && + simdutf::find(reinterpret_cast(dst), reinterpret_cast(dst) + length, 0xfffd) != reinterpret_cast(dst) + length + ) { + return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( + env->isolate(), "The encoded data was not valid for this encoding"); + } + } + + Local ret; + if (StringBytes::Raw(env->isolate(), dst, length).ToLocal(&ret)) { + args.GetReturnValue().Set(ret); + } +} + void BindingData::ToASCII(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 1); @@ -412,10 +468,9 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data, SetMethod(isolate, target, "encodeInto", EncodeInto); SetMethodNoSideEffect(isolate, target, "encodeUtf8String", EncodeUtf8String); SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8); + SetMethodNoSideEffect(isolate, target, "decodeSingleByte", DecodeSingleByte); SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII); SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode); - SetMethodNoSideEffect( - isolate, target, "decodeWindows1252", DecodeWindows1252); } void BindingData::CreatePerContextProperties(Local target, @@ -431,79 +486,9 @@ void BindingData::RegisterTimerExternalReferences( registry->Register(EncodeInto); registry->Register(EncodeUtf8String); registry->Register(DecodeUTF8); + registry->Register(DecodeSingleByte); registry->Register(ToASCII); registry->Register(ToUnicode); - registry->Register(DecodeWindows1252); -} - -void BindingData::DecodeWindows1252(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - - CHECK_GE(args.Length(), 1); - if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || - args[0]->IsArrayBufferView())) { - return node::THROW_ERR_INVALID_ARG_TYPE( - env->isolate(), - "The \"input\" argument must be an instance of ArrayBuffer, " - "SharedArrayBuffer, or ArrayBufferView."); - } - - bool ignore_bom = args[1]->IsTrue(); - - ArrayBufferViewContents buffer(args[0]); - const uint8_t* data = buffer.data(); - size_t length = buffer.length(); - - if (ignore_bom && length > 0 && data[0] == 0xFF) { - data++; - length--; - } - - if (length == 0) { - return args.GetReturnValue().SetEmptyString(); - } - - // Windows-1252 specific mapping for bytes 128-159 - // These differ from Latin-1/ISO-8859-1 - static const uint16_t windows1252_mapping[32] = { - 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F - 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F - }; - - std::string result; - result.reserve(length * 3); // Reserve space for UTF-8 output - - for (size_t i = 0; i < length; i++) { - uint8_t byte = data[i]; - uint32_t codepoint; - - // Check if byte is in the special Windows-1252 range (128-159) - if (byte >= 0x80 && byte <= 0x9F) { - codepoint = windows1252_mapping[byte - 0x80]; - } else { - // For all other bytes, Windows-1252 is identical to Latin-1 - codepoint = byte; - } - - // Convert codepoint to UTF-8 - if (codepoint < 0x80) { - result.push_back(static_cast(codepoint)); - } else if (codepoint < 0x800) { - result.push_back(static_cast(0xC0 | (codepoint >> 6))); - result.push_back(static_cast(0x80 | (codepoint & 0x3F))); - } else { - result.push_back(static_cast(0xE0 | (codepoint >> 12))); - result.push_back(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); - result.push_back(static_cast(0x80 | (codepoint & 0x3F))); - } - } - - Local ret; - if (ToV8Value(env->context(), result, env->isolate()).ToLocal(&ret)) { - args.GetReturnValue().Set(ret); - } } } // namespace encoding_binding diff --git a/src/encoding_binding.h b/src/encoding_binding.h index 8393702cce855f..c1eae9472878d0 100644 --- a/src/encoding_binding.h +++ b/src/encoding_binding.h @@ -31,8 +31,7 @@ class BindingData : public SnapshotableObject { static void EncodeInto(const v8::FunctionCallbackInfo& args); static void EncodeUtf8String(const v8::FunctionCallbackInfo& args); static void DecodeUTF8(const v8::FunctionCallbackInfo& args); - static void DecodeWindows1252( - const v8::FunctionCallbackInfo& args); + static void DecodeSingleByte(const v8::FunctionCallbackInfo& args); static void ToASCII(const v8::FunctionCallbackInfo& args); static void ToUnicode(const v8::FunctionCallbackInfo& args); diff --git a/src/encoding_singlebyte.h b/src/encoding_singlebyte.h new file mode 100644 index 00000000000000..7c8b1c87ad9e56 --- /dev/null +++ b/src/encoding_singlebyte.h @@ -0,0 +1,380 @@ +// Generated from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings +// Mapping for 0x80-0xFF, 0xFFFD designates unmapped offset (replacement) +// Flag is whether the table contains 0xFFFD or not +namespace { + +#define tROW(x) x, x+1, x+2, x+3, x+4, x+5, x+6, x+7, x+8, x+9, x+10, x+11, x+12, x+13, x+14, x+15 +#define tASCII tROW(0x00), tROW(0x10), tROW(0x20), tROW(0x30), tROW(0x40), tROW(0x50), tROW(0x60), tROW(0x70) + +static constexpr bool fIBM866 = false; +static constexpr uint16_t tIBM866[256] = { + tASCII, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, + 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E, 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0 +}; + +static constexpr bool fISO_8859_2 = false; +static constexpr uint16_t tISO_8859_2[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B, + 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C, + 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, + 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, + 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, + 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9 +}; + +static constexpr bool fISO_8859_3 = true; +static constexpr uint16_t tISO_8859_3[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7, 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B, + 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7, 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C, + 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7, 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9 +}; + +static constexpr bool fISO_8859_4 = false; +static constexpr uint16_t tISO_8859_4[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF, + 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7, 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B, + 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A, + 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF, + 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B, + 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9 +}; + +static constexpr bool fISO_8859_5 = false; +static constexpr uint16_t tISO_8859_5[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, + 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F +}; + +static constexpr bool fISO_8859_6 = true; +static constexpr uint16_t tISO_8859_6[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F, + 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, + 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F, + 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD +}; + +static constexpr bool fISO_8859_7 = true; +static constexpr uint16_t tISO_8859_7[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x2018, 0x2019, 0x00A3, 0x20AC, 0x20AF, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x037A, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, + 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, + 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, + 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD +}; + +static constexpr bool fISO_8859_8 = true; +static constexpr uint16_t tISO_8859_8[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017, + 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, + 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD +}; + +static constexpr bool fISO_8859_10 = false; +static constexpr uint16_t tISO_8859_10[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A, + 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7, 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B, + 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF, + 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168, 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, + 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF, + 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138 +}; + +static constexpr bool fISO_8859_13 = false; +static constexpr uint16_t tISO_8859_13[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, + 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, + 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, + 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, + 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019 +}; + +static constexpr bool fISO_8859_14 = false; +static constexpr uint16_t tISO_8859_14[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178, + 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, + tROW(0xC0), + 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, + tROW(0xE0), + 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF +}; + +static constexpr bool fISO_8859_15 = false; +static constexpr uint16_t tISO_8859_15[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7, 0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF, + tROW(0xC0), + tROW(0xD0), + tROW(0xE0), + tROW(0xF0), +}; + +static constexpr bool fISO_8859_16 = false; +static constexpr uint16_t tISO_8859_16[256] = { + tASCII, + tROW(0x80), + tROW(0x90), + 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B, + 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C, + 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF +}; + +static constexpr bool fKOI8_R = false; +static constexpr uint16_t tKOI8_R[256] = { + tASCII, + 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, + 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, + 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E, + 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9, + 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, + 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, + 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, + 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A +}; + +static constexpr bool fKOI8_U = false; +static constexpr uint16_t tKOI8_U[256] = { + tASCII, + 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, + 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, + 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x045E, 0x255E, + 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407, 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x040E, 0x00A9, + 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, + 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, + 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, + 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A +}; + +static constexpr bool fMacintosh = false; +static constexpr uint16_t tMacintosh[256] = { + tASCII, + 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, + 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, + 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, + 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, + 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, + 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, + 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, + 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7 +}; + +static constexpr bool fWindows874 = true; +static constexpr uint16_t tWindows874[256] = { + tASCII, + 0x20AC, 0x0081, 0x0082, 0x0083, 0x0084, 0x2026, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, + 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, + 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, + 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, + 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, + 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD +}; + +static constexpr bool fWindows1250 = false; +static constexpr uint16_t tWindows1250[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0083, 0x201E, 0x2026, 0x2020, 0x2021, 0x0088, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0098, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A, + 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B, + 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C, + 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, + 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, + 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, + 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9 +}; + +static constexpr bool fWindows1251 = false; +static constexpr uint16_t tWindows1251[256] = { + tASCII, + 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F, + 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0098, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F, + 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407, + 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F +}; + +static constexpr bool fWindows1252 = false; +static constexpr uint16_t tWindows1252[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, + tROW(0xA0), + tROW(0xB0), + tROW(0xC0), + tROW(0xD0), + tROW(0xE0), + tROW(0xF0), +}; + +static constexpr bool fWindows1253 = true; +static constexpr uint16_t tWindows1253[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x0088, 0x2030, 0x008A, 0x2039, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0098, 0x2122, 0x009A, 0x203A, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, + 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, + 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, + 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD +}; + +static constexpr bool fWindows1254 = false; +static constexpr uint16_t tWindows1254[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x008E, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x009E, 0x0178, + tROW(0xA0), + tROW(0xB0), + tROW(0xC0), + 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, + tROW(0xE0), + 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF +}; + +static constexpr bool fWindows1255 = true; +static constexpr uint16_t tWindows1255[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x008A, 0x2039, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x009A, 0x203A, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF, + 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, + 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD +}; + +static constexpr bool fWindows1256 = false; +static constexpr uint16_t tWindows1256[256] = { + tASCII, + 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, + 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA, + 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F, + 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, + 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7, 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643, + 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF, + 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7, 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2 +}; + +static constexpr bool fWindows1257 = true; +static constexpr uint16_t tWindows1257[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0083, 0x201E, 0x2026, 0x2020, 0x2021, 0x0088, 0x2030, 0x008A, 0x2039, 0x008C, 0x00A8, 0x02C7, 0x00B8, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0098, 0x2122, 0x009A, 0x203A, 0x009C, 0x00AF, 0x02DB, 0x009F, + 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, + 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, + 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, + 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, + 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9 +}; + +static constexpr bool fWindows1258 = false; +static constexpr uint16_t tWindows1258[256] = { + tASCII, + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x008A, 0x2039, 0x0152, 0x008D, 0x008E, 0x008F, + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x009A, 0x203A, 0x0153, 0x009D, 0x009E, 0x0178, + tROW(0xA0), + tROW(0xB0), + 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF, + 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF, + 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF +}; + +static constexpr bool fXMacCyrillic = false; +static constexpr uint16_t tXMacCyrillic[256] = { + tASCII, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x2020, 0x00B0, 0x0490, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x0406, 0x00AE, 0x00A9, 0x2122, 0x0402, 0x0452, 0x2260, 0x0403, 0x0453, + 0x221E, 0x00B1, 0x2264, 0x2265, 0x0456, 0x00B5, 0x0491, 0x0408, 0x0404, 0x0454, 0x0407, 0x0457, 0x0409, 0x0459, 0x040A, 0x045A, + 0x0458, 0x0405, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x040B, 0x045B, 0x040C, 0x045C, 0x0455, + 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x201E, 0x040E, 0x045E, 0x040F, 0x045F, 0x2116, 0x0401, 0x0451, 0x044F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x20AC +}; + +} + +// Matches the list of encoding sin single-byte.js +const uint16_t *const tSingleByteEncodings[28] = { + tIBM866, tKOI8_R, tKOI8_U, tMacintosh, tXMacCyrillic, + tISO_8859_2, tISO_8859_3, tISO_8859_4, tISO_8859_5, tISO_8859_6, tISO_8859_7, + tISO_8859_8, tISO_8859_8, // second time for for 8-i + tISO_8859_10, tISO_8859_13, tISO_8859_14, tISO_8859_15, tISO_8859_16, + tWindows874, tWindows1250, tWindows1251, tWindows1252, tWindows1253, + tWindows1254, tWindows1255, tWindows1256, tWindows1257, tWindows1258 +}; + +// Matches the list of encoding sin single-byte.js +const bool fSingleByteEncodings[28] = { + fIBM866, fKOI8_R, fKOI8_U, fMacintosh, fXMacCyrillic, + fISO_8859_2, fISO_8859_3, fISO_8859_4, fISO_8859_5, fISO_8859_6, fISO_8859_7, + fISO_8859_8, fISO_8859_8, // second time for for 8-i + fISO_8859_10, fISO_8859_13, fISO_8859_14, fISO_8859_15, fISO_8859_16, + fWindows874, fWindows1250, fWindows1251, fWindows1252, fWindows1253, + fWindows1254, fWindows1255, fWindows1256, fWindows1257, fWindows1258 +}; diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 03b5fd7ebe3816..15a998e2718543 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -660,4 +660,10 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, return Encode(isolate, buf, len, encoding); } +MaybeLocal StringBytes::Raw(Isolate* isolate, + uint16_t* buf, + size_t buflen) { + return ExternTwoByteString::New(isolate, buf, buflen); +} + } // namespace node diff --git a/src/string_bytes.h b/src/string_bytes.h index 9949f508f83ffe..c05ad26f6330b0 100644 --- a/src/string_bytes.h +++ b/src/string_bytes.h @@ -95,6 +95,10 @@ class StringBytes { const char* buf, enum encoding encoding); + static v8::MaybeLocal Raw(v8::Isolate* isolate, + uint16_t* buf, + size_t buflen); + private: static size_t WriteUCS2(v8::Isolate* isolate, char* buf, diff --git a/test/parallel/test-internal-encoding-binding.js b/test/parallel/test-internal-encoding-binding.js deleted file mode 100644 index 7d5397d213c205..00000000000000 --- a/test/parallel/test-internal-encoding-binding.js +++ /dev/null @@ -1,53 +0,0 @@ -// Flags: --expose-internals - -'use strict'; - -require('../common'); - -const assert = require('node:assert'); -const { internalBinding } = require('internal/test/binding'); -const binding = internalBinding('encoding_binding'); - -// Windows-1252 specific tests -{ - // Test Windows-1252 special characters in 128-159 range - // These differ from Latin-1 - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x80), false, false), '€'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x82), false, false), '‚'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x83), false, false), 'ƒ'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x9F), false, false), 'Ÿ'); -} - -{ - // Test Windows-1252 characters outside 128-159 range (same as Latin-1) - const buf = Uint8Array.from([0xC1, 0xE9, 0xF3]); - assert.strictEqual(binding.decodeWindows1252(buf, false, false), 'Áéó'); -} - -{ - // Empty input - const buf = Uint8Array.from([]); - assert.strictEqual(binding.decodeWindows1252(buf, false, false), ''); -} - -// Windows-1252 specific tests -{ - // Test Windows-1252 special characters in 128-159 range - // These differ from Latin-1 - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x80), false, false), '€'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x82), false, false), '‚'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x83), false, false), 'ƒ'); - assert.strictEqual(binding.decodeWindows1252(Uint8Array.of(0x9F), false, false), 'Ÿ'); -} - -{ - // Test Windows-1252 characters outside 128-159 range (same as Latin-1) - const buf = Uint8Array.from([0xC1, 0xE9, 0xF3]); - assert.strictEqual(binding.decodeWindows1252(buf, false, false), 'Áéó'); -} - -{ - // Empty input - const buf = Uint8Array.from([]); - assert.strictEqual(binding.decodeWindows1252(buf, false, false), ''); -} diff --git a/typings/internalBinding/encoding_binding.d.ts b/typings/internalBinding/encoding_binding.d.ts index 6833c9ac0557b1..0774a21f25e21f 100644 --- a/typings/internalBinding/encoding_binding.d.ts +++ b/typings/internalBinding/encoding_binding.d.ts @@ -4,5 +4,4 @@ export interface EncodingBinding { decodeUTF8(buffer: ArrayBufferView | ArrayBuffer | SharedArrayBuffer, ignoreBOM?: boolean, hasFatal?: boolean): string; toASCII(input: string): string; toUnicode(input: string): string; - decodeWindows1252(buffer: ArrayBufferView | ArrayBuffer | SharedArrayBuffer, ignoreBOM?: boolean, hasFatal?: boolean): string; }