Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions ext/json/ext/generator/generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)

ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
{
RBIMPL_ASSERT_OR_ASSUME(len < vec_len);

// Flush the buffer so everything up until the last 'len' characters are unflushed.
search_flush(search);

Expand All @@ -297,12 +299,18 @@ ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned
char *s = (buf->ptr + buf->len);

// Pad the buffer with dummy characters that won't need escaping.
// This seem wateful at first sight, but memset of vector length is very fast.
memset(s, 'X', vec_len);
// This seem wasteful at first sight, but memset of vector length is very fast.
// This is a space as it can be directly represented as an immediate on AArch64.
memset(s, ' ', vec_len);

// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
MEMCPY(s, search->ptr, char, len);
if (vec_len == 16) {
RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD);
json_fast_memcpy16(s, search->ptr, len);
} else {
MEMCPY(s, search->ptr, char, len);
}

return s;
}
Expand Down
4 changes: 4 additions & 0 deletions ext/json/ext/json.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
#include "ruby/encoding.h"
#include <stdint.h>

#ifndef RBIMPL_ASSERT_OR_ASSUME
# define RBIMPL_ASSERT_OR_ASSUME(x)
#endif

#if defined(RUBY_DEBUG) && RUBY_DEBUG
# define JSON_ASSERT RUBY_ASSERT
#else
Expand Down
29 changes: 28 additions & 1 deletion ext/json/ext/simd/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,34 @@ static inline int trailing_zeros(int input)

#ifdef JSON_ENABLE_SIMD

#define SIMD_MINIMUM_THRESHOLD 6
#define SIMD_MINIMUM_THRESHOLD 4

ALWAYS_INLINE(static) void json_fast_memcpy16(char *dst, const char *src, size_t len)
{
RBIMPL_ASSERT_OR_ASSUME(len < 16);
RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); // 4
#if defined(__has_builtin) && __has_builtin(__builtin_memcpy)
// If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD (4) and vec_len-1 (15) bytes.
// These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy
// the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct
// position in both copies.

// Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the
// generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)),
// when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional
// select instruction instead of direct loads and stores with a branch. This ends up slower than the branch
// plus two loads and stores generated when using __builtin_memcpy.
if (len >= 8) {
__builtin_memcpy(dst, src, 8);
__builtin_memcpy(dst + len - 8, src + len - 8, 8);
} else {
__builtin_memcpy(dst, src, 4);
__builtin_memcpy(dst + len - 4, src + len - 4, 4);
}
#else
MEMCPY(dst, src, char, len);
#endif
}

#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
#include <arm_neon.h>
Expand Down
16 changes: 16 additions & 0 deletions test/json/json_generator_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,22 @@ def test_backslash
json = '"\\nabc"'
assert_equal json, generate(data)
#
data = "\n"
json = '"\\n"'
assert_equal json, generate(data)
#
(0..16).each do |i|
data = ('a' * i) + "\n"
json = '"' + ('a' * i) + '\\n"'
assert_equal json, generate(data)
end
#
(0..16).each do |i|
data = "\n" + ('a' * i)
json = '"' + '\\n' + ('a' * i) + '"'
assert_equal json, generate(data)
end
#
data = ["'"]
json = '["\\\'"]'
assert_equal '["\'"]', generate(data)
Expand Down