Skip to content

Commit 6a46ae3

Browse files
committed
src: move all 1-byte encodings to native
1 parent efa5be8 commit 6a46ae3

File tree

8 files changed

+495
-159
lines changed

8 files changed

+495
-159
lines changed

lib/internal/encoding.js

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
// https://encoding.spec.whatwg.org
55

66
const {
7+
ArrayPrototypeMap,
78
Boolean,
89
ObjectDefineProperties,
910
ObjectGetOwnPropertyDescriptors,
1011
ObjectSetPrototypeOf,
1112
ObjectValues,
13+
SafeArrayIterator,
1214
SafeMap,
1315
StringPrototypeSlice,
1416
Symbol,
@@ -32,8 +34,6 @@ const kFatal = Symbol('kFatal');
3234
const kUTF8FastPath = Symbol('kUTF8FastPath');
3335
const kIgnoreBOM = Symbol('kIgnoreBOM');
3436

35-
const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');
36-
3737
const {
3838
getConstructorOf,
3939
customInspectSymbol: inspect,
@@ -58,6 +58,7 @@ const {
5858
encodeIntoResults,
5959
encodeUtf8String,
6060
decodeUTF8,
61+
decodeSingleByte,
6162
} = binding;
6263

6364
function validateDecoder(obj) {
@@ -71,6 +72,47 @@ const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
7172

7273
const empty = new FastBuffer();
7374

75+
// Has to be synced with src/
76+
const encodingsSinglebyte = new SafeMap(new SafeArrayIterator(ArrayPrototypeMap([
77+
'ibm866',
78+
'koi8-r',
79+
'koi8-u',
80+
'macintosh',
81+
'x-mac-cyrillic',
82+
'iso-8859-2',
83+
'iso-8859-3',
84+
'iso-8859-4',
85+
'iso-8859-5',
86+
'iso-8859-6',
87+
'iso-8859-7',
88+
'iso-8859-8',
89+
'iso-8859-8-i',
90+
'iso-8859-10',
91+
'iso-8859-13',
92+
'iso-8859-14',
93+
'iso-8859-15',
94+
'iso-8859-16',
95+
'windows-874',
96+
'windows-1250',
97+
'windows-1251',
98+
'windows-1252',
99+
'windows-1253',
100+
'windows-1254',
101+
'windows-1255',
102+
'windows-1256',
103+
'windows-1257',
104+
'windows-1258',
105+
'x-user-defined', // Has to be last, special case
106+
], (e, i) => [e, i])));
107+
108+
const isSinglebyteEncoding = (enc) => encodingsSinglebyte.has(enc);
109+
110+
function createSinglebyteDecoder(encoding, fatal) {
111+
const key = encodingsSinglebyte.get(encoding);
112+
if (key === undefined) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
113+
return (buf) => decodeSingleByte(buf, key, fatal);
114+
}
115+
74116
const encodings = new SafeMap([
75117
['unicode-1-1-utf-8', 'utf-8'],
76118
['unicode11utf8', 'utf-8'],
@@ -460,7 +502,7 @@ function makeTextDecoderICU() {
460502
validateDecoder(this);
461503
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
462504

463-
if (this[kMethod]) return this[kMethod](parseInput(input));
505+
if (this[kMethod]) return this[kMethod](input);
464506

465507
this[kUTF8FastPath] &&= !(options?.stream);
466508

lib/internal/encoding/single-byte.js

Lines changed: 0 additions & 155 deletions
This file was deleted.

src/encoding_binding.cc

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "encoding_binding.h"
2+
#include "encoding_singlebyte.h"
23
#include "ada.h"
34
#include "env-inl.h"
45
#include "node_errors.h"
@@ -379,6 +380,62 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
379380
}
380381
}
381382

383+
void BindingData::DecodeSingleByte(const FunctionCallbackInfo<Value>& args) {
384+
Environment* env = Environment::GetCurrent(args);
385+
386+
CHECK_GE(args.Length(), 2);
387+
388+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
389+
args[0]->IsArrayBufferView())) {
390+
return node::THROW_ERR_INVALID_ARG_TYPE(
391+
env->isolate(),
392+
"The \"list\" argument must be an instance of SharedArrayBuffer, "
393+
"ArrayBuffer or ArrayBufferView.");
394+
}
395+
396+
CHECK(args[1]->IsInt32());
397+
const int encoding = args[1].As<v8::Int32>()->Value();
398+
CHECK(encoding >= 0 && encoding < 29);
399+
400+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
401+
const uint8_t* data = buffer.data();
402+
size_t length = buffer.length();
403+
404+
if (length == 0) return args.GetReturnValue().SetEmptyString();
405+
406+
if (!simdutf::validate_ascii_with_errors(reinterpret_cast<const char*>(data), length).error) {
407+
Local<Value> ret;
408+
if (StringBytes::Encode(env->isolate(), reinterpret_cast<const char*>(data), length, LATIN1).ToLocal(&ret)) {
409+
args.GetReturnValue().Set(ret);
410+
}
411+
return;
412+
}
413+
414+
uint16_t* dst = node::UncheckedMalloc<uint16_t>(length);
415+
416+
if (encoding == 28) {
417+
// x-user-defined
418+
for (size_t i = 0; i < length; i++) dst[i] = data[i] >= 0x80 ? data[i] + 0xf700 : data[i];
419+
} else {
420+
bool has_fatal = args[2]->IsTrue();
421+
422+
const uint16_t* table = tSingleByteEncodings[encoding];
423+
for (size_t i = 0; i < length; i++) dst[i] = table[data[i]];
424+
425+
if (has_fatal && fSingleByteEncodings[encoding] &&
426+
simdutf::find(reinterpret_cast<char16_t*>(dst), reinterpret_cast<char16_t*>(dst) + length, 0xfffd) != reinterpret_cast<char16_t*>(dst) + length
427+
) {
428+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
429+
env->isolate(), "The encoded data was not valid for this encoding");
430+
}
431+
}
432+
433+
Local<Value> ret;
434+
if (StringBytes::Raw(env->isolate(), dst, length).ToLocal(&ret)) {
435+
args.GetReturnValue().Set(ret);
436+
}
437+
}
438+
382439
void BindingData::ToASCII(const FunctionCallbackInfo<Value>& args) {
383440
Environment* env = Environment::GetCurrent(args);
384441
CHECK_GE(args.Length(), 1);
@@ -411,6 +468,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
411468
SetMethod(isolate, target, "encodeInto", EncodeInto);
412469
SetMethodNoSideEffect(isolate, target, "encodeUtf8String", EncodeUtf8String);
413470
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
471+
SetMethodNoSideEffect(isolate, target, "decodeSingleByte", DecodeSingleByte);
414472
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
415473
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
416474
}
@@ -428,6 +486,7 @@ void BindingData::RegisterTimerExternalReferences(
428486
registry->Register(EncodeInto);
429487
registry->Register(EncodeUtf8String);
430488
registry->Register(DecodeUTF8);
489+
registry->Register(DecodeSingleByte);
431490
registry->Register(ToASCII);
432491
registry->Register(ToUnicode);
433492
}

src/encoding_binding.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
3131
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
3232
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
3333
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
34+
static void DecodeSingleByte(const v8::FunctionCallbackInfo<v8::Value>& args);
3435

3536
static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
3637
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);

0 commit comments

Comments
 (0)