diff --git a/doc/modules/ROOT/pages/bit.adoc b/doc/modules/ROOT/pages/bit.adoc index 2bf481a..f321073 100644 --- a/doc/modules/ROOT/pages/bit.adoc +++ b/doc/modules/ROOT/pages/bit.adoc @@ -26,7 +26,7 @@ For `u128`, the functions delegate to the `boost::int128` implementations. [source,c++] ---- template -[[nodiscard]] constexpr auto has_single_bit(UnsignedInt x) noexcept -> bool; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto has_single_bit(UnsignedInt x) noexcept -> bool; ---- Returns `true` if `x` is a power of two. @@ -37,7 +37,7 @@ See https://en.cppreference.com/w/cpp/numeric/has_single_bit.html[`std::has_sing [source,c++] ---- template -[[nodiscard]] constexpr auto bit_ceil(UnsignedInt x) noexcept -> UnsignedInt; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto bit_ceil(UnsignedInt x) noexcept -> UnsignedInt; ---- Returns the smallest power of two not less than `x`. @@ -49,7 +49,7 @@ See https://en.cppreference.com/w/cpp/numeric/bit_ceil.html[`std::bit_ceil`]. [source,c++] ---- template -[[nodiscard]] constexpr auto bit_floor(UnsignedInt x) noexcept -> UnsignedInt; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto bit_floor(UnsignedInt x) noexcept -> UnsignedInt; ---- Returns the largest power of two not greater than `x`. @@ -61,7 +61,7 @@ See https://en.cppreference.com/w/cpp/numeric/bit_floor.html[`std::bit_floor`]. [source,c++] ---- template -[[nodiscard]] constexpr auto bit_width(UnsignedInt x) noexcept -> int; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto bit_width(UnsignedInt x) noexcept -> int; ---- Returns the number of bits needed to represent `x` (i.e., 1 + floor(log2(x)) for x > 0, or 0 for x == 0). @@ -74,7 +74,7 @@ See https://en.cppreference.com/w/cpp/numeric/bit_width.html[`std::bit_width`]. [source,c++] ---- template -[[nodiscard]] constexpr auto rotl(UnsignedInt x, int s) noexcept -> UnsignedInt; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto rotl(UnsignedInt x, int s) noexcept -> UnsignedInt; ---- Computes the result of bitwise left-rotating `x` by `s` positions. @@ -87,7 +87,7 @@ NOTE: `rotl` is not available for `bounded_uint` types. Bit rotation can produce [source,c++] ---- template -[[nodiscard]] constexpr auto rotr(UnsignedInt x, int s) noexcept -> UnsignedInt; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto rotr(UnsignedInt x, int s) noexcept -> UnsignedInt; ---- Computes the result of bitwise right-rotating `x` by `s` positions. @@ -102,7 +102,7 @@ NOTE: `rotr` is not available for `bounded_uint` types. Bit rotation can produce [source,c++] ---- template -[[nodiscard]] constexpr auto countl_zero(UnsignedInt x) noexcept -> int; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto countl_zero(UnsignedInt x) noexcept -> int; ---- Returns the number of consecutive 0-bits starting from the most significant bit. @@ -113,7 +113,7 @@ See https://en.cppreference.com/w/cpp/numeric/countl_zero.html[`std::countl_zero [source,c++] ---- template -[[nodiscard]] constexpr auto countl_one(UnsignedInt x) noexcept -> int; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto countl_one(UnsignedInt x) noexcept -> int; ---- Returns the number of consecutive 1-bits starting from the most significant bit. @@ -124,7 +124,7 @@ See https://en.cppreference.com/w/cpp/numeric/countl_one.html[`std::countl_one`] [source,c++] ---- template -[[nodiscard]] constexpr auto countr_zero(UnsignedInt x) noexcept -> int; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto countr_zero(UnsignedInt x) noexcept -> int; ---- Returns the number of consecutive 0-bits starting from the least significant bit. @@ -135,7 +135,7 @@ See https://en.cppreference.com/w/cpp/numeric/countr_zero.html[`std::countr_zero [source,c++] ---- template -[[nodiscard]] constexpr auto countr_one(UnsignedInt x) noexcept -> int; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto countr_one(UnsignedInt x) noexcept -> int; ---- Returns the number of consecutive 1-bits starting from the least significant bit. @@ -146,7 +146,7 @@ See https://en.cppreference.com/w/cpp/numeric/countr_one.html[`std::countr_one`] [source,c++] ---- template -[[nodiscard]] constexpr auto popcount(UnsignedInt x) noexcept -> int; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto popcount(UnsignedInt x) noexcept -> int; ---- Returns the number of 1-bits in `x`. @@ -159,7 +159,7 @@ See https://en.cppreference.com/w/cpp/numeric/popcount.html[`std::popcount`]. [source,c++] ---- template -[[nodiscard]] constexpr auto byteswap(Int x) noexcept -> Int; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto byteswap(Int x) noexcept -> Int; ---- Reverses the bytes of `x`. @@ -172,7 +172,7 @@ NOTE: `byteswap` is not available for `bounded_uint` types. Byte reversal can pr [source,c++] ---- template -[[nodiscard]] constexpr auto bitswap(Int x) noexcept -> Int; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto bitswap(Int x) noexcept -> Int; ---- Reverses all bits of `x`. diff --git a/doc/modules/ROOT/pages/byte_conversions.adoc b/doc/modules/ROOT/pages/byte_conversions.adoc index 152aa3c..5696e41 100644 --- a/doc/modules/ROOT/pages/byte_conversions.adoc +++ b/doc/modules/ROOT/pages/byte_conversions.adoc @@ -170,7 +170,7 @@ The value is first converted to big-endian byte order using `to_be`, then reinte [source,c++] ---- template -[[nodiscard]] constexpr auto to_be_bytes(const T value) noexcept -> std::array; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto to_be_bytes(const T value) noexcept -> std::array; ---- === Parameters @@ -203,7 +203,7 @@ The bytes are reinterpreted as the underlying type and then converted from big-e [source,c++] ---- template -[[nodiscard]] constexpr auto from_be_bytes(const std::span bytes) -> T; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto from_be_bytes(const std::span bytes) -> T; ---- === Parameters @@ -251,7 +251,7 @@ The value is first converted to little-endian byte order using `to_le`, then rei [source,c++] ---- template -[[nodiscard]] constexpr auto to_le_bytes(const T value) noexcept -> std::array; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto to_le_bytes(const T value) noexcept -> std::array; ---- === Parameters @@ -284,7 +284,7 @@ The bytes are reinterpreted as the underlying type and then converted from littl [source,c++] ---- template -[[nodiscard]] constexpr auto from_le_bytes(const std::span bytes) -> T; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto from_le_bytes(const std::span bytes) -> T; ---- === Parameters @@ -334,7 +334,7 @@ The result is equivalent to `std::bit_cast>(val [source,c++] ---- template -[[nodiscard]] constexpr auto to_ne_bytes(const T value) noexcept -> std::array; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto to_ne_bytes(const T value) noexcept -> std::array; ---- === Parameters @@ -368,7 +368,7 @@ Delegates to `from_le_bytes` on little-endian platforms and `from_be_bytes` on b [source,c++] ---- template -[[nodiscard]] constexpr auto from_ne_bytes(const std::span bytes) -> T; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto from_ne_bytes(const std::span bytes) -> T; ---- === Parameters diff --git a/doc/modules/ROOT/pages/charconv.adoc b/doc/modules/ROOT/pages/charconv.adoc index 599b073..6bc6af4 100644 --- a/doc/modules/ROOT/pages/charconv.adoc +++ b/doc/modules/ROOT/pages/charconv.adoc @@ -33,12 +33,14 @@ namespace boost::charconv { // Convert safe integer to character string template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto to_chars(char* first, char* last, T value, int base = 10) -> charconv::to_chars_result; // Convert character string to safe integer template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto from_chars(const char* first, const char* last, T& value, int base = 10) -> charconv::from_chars_result; @@ -57,8 +59,10 @@ struct to_chars_result char* ptr; std::errc ec; + BOOST_SAFE_NUMBERS_HOST_DEVICE friend constexpr bool operator==(const to_chars_result& lhs, const to_chars_result& rhs) noexcept = default; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr explicit operator bool() const noexcept { return ec == std::errc{}; } }; @@ -83,8 +87,11 @@ struct from_chars_result const char* ptr; std::errc ec; + BOOST_SAFE_NUMBERS_HOST_DEVICE friend constexpr bool operator==(const from_chars_result& lhs, const from_chars_result& rhs) noexcept = default; + + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr explicit operator bool() const noexcept { return ec == std::errc{}; } }; @@ -103,6 +110,7 @@ struct from_chars_result [source,c++] ---- template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto to_chars(char* first, char* last, T value, int base = 10) -> charconv::to_chars_result; @@ -133,6 +141,7 @@ Returns `boost::charconv::to_chars_result` with: [source,c++] ---- template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto from_chars(const char* first, const char* last, T& value, int base = 10) -> charconv::from_chars_result; diff --git a/doc/modules/ROOT/pages/cuda.adoc b/doc/modules/ROOT/pages/cuda.adoc index e772070..dc7ff15 100644 --- a/doc/modules/ROOT/pages/cuda.adoc +++ b/doc/modules/ROOT/pages/cuda.adoc @@ -92,5 +92,5 @@ Device error on thread 256 at /home/runner/work/safe_numbers/boost-root/libs/saf The `device_error_context` will also attempt to `printf` the error into the terminal. This works when compiling with verbose mode `-V`. -`printf` error messages will look the same as the message displayed by +`printf` error messages will look the same as the message displayed by the thrown exception diff --git a/doc/modules/ROOT/pages/integer_utilities.adoc b/doc/modules/ROOT/pages/integer_utilities.adoc index 8dc4a0e..d01f31b 100644 --- a/doc/modules/ROOT/pages/integer_utilities.adoc +++ b/doc/modules/ROOT/pages/integer_utilities.adoc @@ -23,7 +23,7 @@ These operate on the non-bounded unsigned types (`u8`, `u16`, `u32`, `u64`, `u12 [source,c++] ---- template -[[nodiscard]] constexpr auto isqrt(const T val) -> T; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto isqrt(const T val) -> T; ---- Returns the integer square root of `val`, i.e., the largest integer `r` such that `r * r \<= val`. @@ -74,7 +74,7 @@ struct remove_trailing_zeros_return [source,c++] ---- template -[[nodiscard]] constexpr auto remove_trailing_zeros(const T n); +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto remove_trailing_zeros(const T n); ---- Removes all trailing decimal zeros from `n`. @@ -141,7 +141,7 @@ Tests whether an unsigned integer value is an exact power of 10 (i.e., one of 1, [source,c++] ---- template -[[nodiscard]] constexpr auto is_power_10(const T n) -> bool; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto is_power_10(const T n) -> bool; ---- === Parameters @@ -171,7 +171,7 @@ Returns the integer base-2 logarithm (floor of log~2~) of a value. [source,c++] ---- template -[[nodiscard]] constexpr auto ilog2(const T n) -> int; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto ilog2(const T n) -> int; ---- Computes `floor(log~2~(n))` using `bit_width(n) - 1`. @@ -213,7 +213,7 @@ Uses an O(1) algorithm based on the most significant bit position to approximate [source,c++] ---- template -[[nodiscard]] constexpr auto ilog10(const T n) -> int; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto ilog10(const T n) -> int; ---- Computes `floor(log~10~(n))` using `num_digits(n) - 1`, where `num_digits` approximates the digit count via `log~10~(x) ~= log~2~(x) / log~2~(10)` and refines with at most two comparisons against a power-of-10 lookup table. @@ -249,7 +249,7 @@ Returns the integer logarithm in an arbitrary base (floor of log~base~) of a val [source,c++] ---- template -[[nodiscard]] constexpr auto ilog(const T n, const T base) -> int; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto ilog(const T n, const T base) -> int; ---- Computes `floor(log~base~(n))` by repeated division. @@ -294,7 +294,7 @@ Integer exponentiation using the exponentiation-by-squaring algorithm. [source,c++] ---- template -[[nodiscard]] constexpr auto ipow(const T a, const T b) noexcept -> T; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto ipow(const T a, const T b) noexcept -> T; ---- Computes `a` raised to the power `b` using exponentiation by squaring. @@ -339,7 +339,7 @@ Tests whether an unsigned integer value is an exact power of 2 (i.e., has exactl [source,c++] ---- template -[[nodiscard]] constexpr auto is_power_2(const T n) noexcept -> bool; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto is_power_2(const T n) noexcept -> bool; ---- === Parameters @@ -370,7 +370,7 @@ For unsigned types, naive subtraction `a - b` when `b > a` would underflow; `abs [source,c++] ---- template -[[nodiscard]] constexpr auto abs_diff(const T a, const T b) noexcept -> T; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto abs_diff(const T a, const T b) noexcept -> T; ---- Returns `|a - b|`, computed as `a - b` if `a >= b`, or `b - a` otherwise. @@ -411,7 +411,7 @@ For unsigned types, this is equivalent to `(a + b - 1) / b` but computed without [source,c++] ---- template -[[nodiscard]] constexpr auto div_ceil(const T a, const T b) noexcept -> T; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto div_ceil(const T a, const T b) noexcept -> T; ---- Returns the ceiling of `a / b`. @@ -454,7 +454,7 @@ This is useful for alignment calculations (e.g., aligning a size to a page bound [source,c++] ---- template -[[nodiscard]] constexpr auto next_multiple_of(const T a, const T b) noexcept -> T; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto next_multiple_of(const T a, const T b) noexcept -> T; ---- Returns the smallest value `m` such that `m >= a` and `m % b == 0`. diff --git a/doc/modules/ROOT/pages/limits.adoc b/doc/modules/ROOT/pages/limits.adoc index fe9aa3a..f273a2b 100644 --- a/doc/modules/ROOT/pages/limits.adoc +++ b/doc/modules/ROOT/pages/limits.adoc @@ -79,15 +79,15 @@ struct numeric_limits static constexpr bool tinyness_before = std::numeric_limits::tinyness_before; // Static member functions - static constexpr T min() noexcept; - static constexpr T max() noexcept; - static constexpr T lowest() noexcept; - static constexpr T epsilon() noexcept; - static constexpr T round_error() noexcept; - static constexpr T infinity() noexcept; - static constexpr T quiet_NaN() noexcept; - static constexpr T signaling_NaN() noexcept; - static constexpr T denorm_min() noexcept; + BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T min() noexcept; + BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T max() noexcept; + BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T lowest() noexcept; + BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T epsilon() noexcept; + BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T round_error() noexcept; + BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T infinity() noexcept; + BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T quiet_NaN() noexcept; + BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T signaling_NaN() noexcept; + BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T denorm_min() noexcept; }; } // namespace std @@ -138,63 +138,63 @@ For unsigned integer types, the following values are consistent across all speci [source,c++] ---- -static constexpr T min() noexcept; +BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T min() noexcept; ---- Returns the minimum finite value (always `Tpass:[{0}]` for unsigned types). [source,c++] ---- -static constexpr T max() noexcept; +BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T max() noexcept; ---- Returns the maximum finite value. [source,c++] ---- -static constexpr T lowest() noexcept; +BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T lowest() noexcept; ---- Returns the lowest finite value (same as `min()` for unsigned types). [source,c++] ---- -static constexpr T epsilon() noexcept; +BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T epsilon() noexcept; ---- Returns `Tpass:[{0}]` (not meaningful for integer types). [source,c++] ---- -static constexpr T round_error() noexcept; +BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T round_error() noexcept; ---- Returns `Tpass:[{0}]` (not meaningful for integer types). [source,c++] ---- -static constexpr T infinity() noexcept; +BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T infinity() noexcept; ---- Returns `Tpass:[{0}]` (unsigned integers cannot represent infinity). [source,c++] ---- -static constexpr T quiet_NaN() noexcept; +BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T quiet_NaN() noexcept; ---- Returns `Tpass:[{0}]` (unsigned integers cannot represent NaN). [source,c++] ---- -static constexpr T signaling_NaN() noexcept; +BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T signaling_NaN() noexcept; ---- Returns `Tpass:[{0}]` (unsigned integers cannot represent NaN). [source,c++] ---- -static constexpr T denorm_min() noexcept; +BOOST_SAFE_NUMBERS_HOST_DEVICE static constexpr T denorm_min() noexcept; ---- Returns `Tpass:[{0}]` (not meaningful for integer types). diff --git a/doc/modules/ROOT/pages/numeric.adoc b/doc/modules/ROOT/pages/numeric.adoc index f058ebd..6899858 100644 --- a/doc/modules/ROOT/pages/numeric.adoc +++ b/doc/modules/ROOT/pages/numeric.adoc @@ -25,7 +25,7 @@ Computes the greatest common divisor of two integers using the Euclidean algorit [source,c++] ---- template -[[nodiscard]] constexpr auto gcd(const T m, const T n) noexcept -> T; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto gcd(const T m, const T n) noexcept -> T; ---- Returns the greatest common divisor of `m` and `n`. @@ -65,7 +65,7 @@ Computes the least common multiple of two integers. [source,c++] ---- template -[[nodiscard]] constexpr auto lcm(const T m, const T n) noexcept -> T; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto lcm(const T m, const T n) noexcept -> T; ---- Returns the least common multiple of `m` and `n`. @@ -106,7 +106,7 @@ The result is rounded towards the first argument `a`. [source,c++] ---- template -[[nodiscard]] constexpr auto midpoint(const T a, const T b) noexcept -> T; +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto midpoint(const T a, const T b) noexcept -> T; ---- Returns the midpoint of `a` and `b`, computed without overflow. diff --git a/doc/modules/ROOT/pages/unsigned_integers.adoc b/doc/modules/ROOT/pages/unsigned_integers.adoc index b0f12bc..842252f 100644 --- a/doc/modules/ROOT/pages/unsigned_integers.adoc +++ b/doc/modules/ROOT/pages/unsigned_integers.adoc @@ -44,103 +44,120 @@ public: using basis_type = BasisType; // Construction - constexpr unsigned_integer_basis() noexcept = default; - explicit constexpr unsigned_integer_basis(BasisType val) noexcept; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr unsigned_integer_basis() noexcept = default; + BOOST_SAFE_NUMBERS_HOST_DEVICE explicit constexpr unsigned_integer_basis(BasisType val) noexcept; template requires std::is_same_v - explicit constexpr unsigned_integer_basis(T) noexcept = delete; // bool prohibited + BOOST_SAFE_NUMBERS_HOST_DEVICE explicit constexpr unsigned_integer_basis(T) noexcept = delete; // bool prohibited // Conversion to underlying types template - explicit constexpr operator OtherBasis() const; + BOOST_SAFE_NUMBERS_HOST_DEVICE explicit constexpr operator OtherBasis() const; // Comparison operators + BOOST_SAFE_NUMBERS_HOST_DEVICE friend constexpr auto operator<=>(unsigned_integer_basis lhs, unsigned_integer_basis rhs) noexcept -> std::strong_ordering = default; // Compound assignment operators (arithmetic) template + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator+=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; template + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator-=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; template + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator*=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; template + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator/=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; template + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator%=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; // Compound assignment operators (bitwise) - constexpr auto operator&=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; - constexpr auto operator|=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; - constexpr auto operator^=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; - constexpr auto operator<<=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; - constexpr auto operator>>=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator&=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator|=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator^=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator<<=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator>>=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; // Increment and decrement operators - constexpr auto operator++() -> unsigned_integer_basis&; - constexpr auto operator++(int) -> unsigned_integer_basis; - constexpr auto operator--() -> unsigned_integer_basis&; - constexpr auto operator--(int) -> unsigned_integer_basis; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator++() -> unsigned_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator++(int) -> unsigned_integer_basis; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator--() -> unsigned_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator--(int) -> unsigned_integer_basis; // Unary operators - constexpr auto operator+() const noexcept -> unsigned_integer_basis; - constexpr auto operator-() const noexcept; // compile-time error + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator+() const noexcept -> unsigned_integer_basis; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator-() const noexcept; // compile-time error }; // class unsigned_integer_basis // Arithmetic operators (throw on overflow/underflow) template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator+(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator-(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator*(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator/(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator%(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; // Bitwise operators template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator~(unsigned_integer_basis lhs) noexcept -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator&(unsigned_integer_basis lhs, unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator|(unsigned_integer_basis lhs, unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator^(unsigned_integer_basis lhs, unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator<<(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator>>(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; @@ -269,7 +286,7 @@ constexpr auto shr(T lhs, T rhs); [source,c++] ---- -constexpr unsigned_integer_basis() noexcept = default; +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr unsigned_integer_basis() noexcept = default; ---- Values are default-initialized to zero. @@ -278,7 +295,7 @@ Values are default-initialized to zero. [source,c++] ---- -explicit constexpr unsigned_integer_basis(BasisType val) noexcept; +BOOST_SAFE_NUMBERS_HOST_DEVICE explicit constexpr unsigned_integer_basis(BasisType val) noexcept; ---- Construction from the underlying type is explicit to prevent accidental conversions. @@ -289,7 +306,7 @@ Construction from the underlying type is explicit to prevent accidental conversi ---- template requires std::is_same_v -explicit constexpr unsigned_integer_basis(T) noexcept = delete; +BOOST_SAFE_NUMBERS_HOST_DEVICE explicit constexpr unsigned_integer_basis(T) noexcept = delete; ---- Constructing from `bool` is a compile-time error. @@ -299,7 +316,7 @@ Constructing from `bool` is a compile-time error. [source,c++] ---- template -explicit constexpr operator OtherBasis() const; +BOOST_SAFE_NUMBERS_HOST_DEVICE explicit constexpr operator OtherBasis() const; ---- Conversion to other unsigned integral types is explicit. @@ -311,6 +328,7 @@ This allows safe narrowing when the value is known to fit at runtime. [source,c++] ---- +BOOST_SAFE_NUMBERS_HOST_DEVICE friend constexpr auto operator<=>(unsigned_integer_basis lhs, unsigned_integer_basis rhs) noexcept -> std::strong_ordering = default; ---- @@ -323,22 +341,27 @@ All comparison operators (`<`, `<=`, `>`, `>=`, `==`, `!=`) are available. [source,c++] ---- template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator+(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator-(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator*(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator/(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator%(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; ---- @@ -356,18 +379,23 @@ All arithmetic operators perform runtime checks and throw exceptions when undefi [source,c++] ---- template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator+=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator-=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator*=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator/=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator%=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; ---- @@ -378,30 +406,36 @@ Compound assignment operators follow the same exception behavior as their corres [source,c++] ---- template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator~(unsigned_integer_basis lhs) noexcept -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator&(unsigned_integer_basis lhs, unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator|(unsigned_integer_basis lhs, unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator^(unsigned_integer_basis lhs, unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator<<(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator>>(unsigned_integer_basis lhs, unsigned_integer_basis rhs) -> unsigned_integer_basis; @@ -473,11 +507,11 @@ All shift policy functions are `noexcept`. [source,c++] ---- -constexpr auto operator&=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; -constexpr auto operator|=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; -constexpr auto operator^=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; -constexpr auto operator<<=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; -constexpr auto operator>>=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator&=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator|=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator^=(unsigned_integer_basis rhs) noexcept -> unsigned_integer_basis&; +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator<<=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator>>=(unsigned_integer_basis rhs) -> unsigned_integer_basis&; ---- Compound bitwise assignment operators delegate to the corresponding free-function bitwise operators and follow the same exception behavior. @@ -488,10 +522,10 @@ Compound bitwise assignment operators delegate to the corresponding free-functio [source,c++] ---- -constexpr auto operator++() -> unsigned_integer_basis&; -constexpr auto operator++(int) -> unsigned_integer_basis; -constexpr auto operator--() -> unsigned_integer_basis&; -constexpr auto operator--(int) -> unsigned_integer_basis; +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator++() -> unsigned_integer_basis&; +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator++(int) -> unsigned_integer_basis; +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator--() -> unsigned_integer_basis&; +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator--(int) -> unsigned_integer_basis; ---- - `++` (pre/post): Throws `std::overflow_error` if the value is already at the maximum @@ -501,8 +535,8 @@ constexpr auto operator--(int) -> unsigned_integer_basis; [source,c++] ---- -constexpr auto operator+() const noexcept -> unsigned_integer_basis; -constexpr auto operator-() const noexcept; // compile-time error +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator+() const noexcept -> unsigned_integer_basis; +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator-() const noexcept; // compile-time error ---- - `+`: Returns a copy of the value (identity). This is consistent with built-in unsigned integer behavior. diff --git a/examples/cuda.cu b/examples/cuda.cu new file mode 100644 index 0000000..eb0a63c --- /dev/null +++ b/examples/cuda.cu @@ -0,0 +1,224 @@ +// Copyright 2026 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +// All safe_numbers types and free functions are annotated with __host__ __device__, +// so they work identically on both host and device. + +__global__ void arithmetic_kernel(const test_type* a, const test_type* b, test_type* out, int n) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < n) + { + // Basic arithmetic with overflow detection works on device + out[i] = a[i] + b[i]; + } +} + +__global__ void bit_kernel(const test_type* in, int* out, int n) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < n) + { + // All free functions work on device + out[i] = boost::safe_numbers::popcount(in[i]); + } +} + +__global__ void utility_kernel(const test_type* in, test_type* out, int n) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < n) + { + // Integer utilities work on device + out[i] = boost::safe_numbers::isqrt(in[i]); + } +} + +__global__ void numeric_kernel(const test_type* a, const test_type* b, test_type* out, int n) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < n) + { + // gcd, lcm, midpoint work on device + out[i] = boost::safe_numbers::gcd(a[i], b[i]); + } +} + +__global__ void charconv_kernel(const test_type* in, test_type* out, int n) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < n) + { + // charconv round-trip on device + char buf[16] {}; + auto tc = boost::charconv::to_chars(buf, buf + sizeof(buf), in[i]); + test_type parsed {}; + boost::charconv::from_chars(buf, tc.ptr, parsed); + out[i] = parsed; + } +} + +// Helper: allocate CUDA managed memory +void allocate(void** ptr, std::size_t bytes) +{ + cudaError_t err = cudaMallocManaged(ptr, bytes); + if (err != cudaSuccess) + { + throw std::runtime_error(cudaGetErrorString(err)); + } + cudaDeviceSynchronize(); +} + +template +void cleanup(T** ptr) +{ + if (*ptr != nullptr) + { + cudaFree(*ptr); + *ptr = nullptr; + } +} + +int main() +{ + const int n = 10000; + const int threadsPerBlock = 256; + const int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; + + std::mt19937_64 rng {42}; + std::uniform_int_distribution dist {basis_type{1}, (std::numeric_limits::max)() / basis_type{2}}; + + // --- Allocate managed arrays --- + + test_type* a = nullptr; + test_type* b = nullptr; + test_type* out_tt = nullptr; + int* out_int = nullptr; + + allocate(reinterpret_cast(&a), n * sizeof(test_type)); + allocate(reinterpret_cast(&b), n * sizeof(test_type)); + allocate(reinterpret_cast(&out_tt), n * sizeof(test_type)); + allocate(reinterpret_cast(&out_int), n * sizeof(int)); + + for (int i = 0; i < n; ++i) + { + a[i] = test_type{dist(rng)}; + b[i] = test_type{dist(rng)}; + } + + // The device_error_context captures any overflow/underflow errors + // reported from device code and rethrows them on the host. + boost::safe_numbers::device_error_context ctx; + + // --- Test 1: Arithmetic (a + b, using half-range to avoid overflow) --- + + arithmetic_kernel<<>>(a, b, out_tt, n); + ctx.synchronize(); + + bool pass = true; + for (int i = 0; i < n; ++i) + { + if (out_tt[i] != a[i] + b[i]) + { + pass = false; + break; + } + } + std::cout << "Arithmetic (add): " << (pass ? "PASSED" : "FAILED") << '\n'; + + // --- Test 2: Bit functions (popcount) --- + + bit_kernel<<>>(a, out_int, n); + ctx.synchronize(); + + pass = true; + for (int i = 0; i < n; ++i) + { + if (out_int[i] != boost::safe_numbers::popcount(a[i])) + { + pass = false; + break; + } + } + std::cout << "Bit (popcount): " << (pass ? "PASSED" : "FAILED") << '\n'; + + // --- Test 3: Integer utilities (isqrt) --- + + utility_kernel<<>>(a, out_tt, n); + ctx.synchronize(); + + pass = true; + for (int i = 0; i < n; ++i) + { + if (out_tt[i] != boost::safe_numbers::isqrt(a[i])) + { + pass = false; + break; + } + } + std::cout << "Utility (isqrt): " << (pass ? "PASSED" : "FAILED") << '\n'; + + // --- Test 4: Numeric (gcd) --- + + numeric_kernel<<>>(a, b, out_tt, n); + ctx.synchronize(); + + pass = true; + for (int i = 0; i < n; ++i) + { + if (out_tt[i] != boost::safe_numbers::gcd(a[i], b[i])) + { + pass = false; + break; + } + } + std::cout << "Numeric (gcd): " << (pass ? "PASSED" : "FAILED") << '\n'; + + // --- Test 5: Charconv round-trip --- + + charconv_kernel<<>>(a, out_tt, n); + ctx.synchronize(); + + pass = true; + for (int i = 0; i < n; ++i) + { + if (out_tt[i] != a[i]) + { + pass = false; + break; + } + } + std::cout << "Charconv (rt): " << (pass ? "PASSED" : "FAILED") << '\n'; + + // --- Cleanup --- + + cleanup(&a); + cleanup(&b); + cleanup(&out_tt); + cleanup(&out_int); + cudaDeviceReset(); + + return 0; +} diff --git a/examples/cuda_error_handling.cu b/examples/cuda_error_handling.cu new file mode 100644 index 0000000..2e9efff --- /dev/null +++ b/examples/cuda_error_handling.cu @@ -0,0 +1,129 @@ +// Copyright 2026 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +// This example demonstrates how to catch arithmetic errors that occur +// on a CUDA device using device_error_context. When a safe_numbers +// operation overflows on the GPU, the error is captured in managed +// memory and rethrown with BOOST_THROW_EXCEPTION on the host when +// you call ctx.synchronize(). +// +// The device_error_context manages a dynamically allocated managed +// memory buffer. When an error is detected, synchronize() clears the +// error state and throws. After catching the exception, the same +// context can be reused immediately for new kernel launches. + +#include +#include +#include +#include + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +// This kernel deliberately overflows: it adds 1 to the maximum u32 value +__global__ void overflow_kernel(test_type* out) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i == 0) + { + const test_type max_val {(std::numeric_limits::max)()}; + out[0] = max_val + test_type{1}; // Overflow! + } +} + +// This kernel performs valid arithmetic +__global__ void safe_kernel(const test_type* in, test_type* out, int n) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < n) + { + out[i] = in[i] + test_type{1}; + } +} + +int main() +{ + // Create a single device_error_context for the lifetime of the program. + // The constructor allocates managed memory for error reporting and + // clears any stale state. + boost::safe_numbers::device_error_context ctx; + + // --------------------------------------------------------------- + // Step 1: Launch a kernel that overflows and catch the error + // --------------------------------------------------------------- + + test_type* result = nullptr; + cudaMallocManaged(&result, sizeof(test_type)); + cudaDeviceSynchronize(); + + std::cout << "=== Launching kernel that overflows ===" << std::endl; + + overflow_kernel<<<1, 1>>>(result); + + // synchronize() waits for the kernel, reads the error state, + // and throws the appropriate std::exception if one was captured. + // On error it clears the error state before throwing, so the + // context is immediately reusable after catching the exception. + try + { + ctx.synchronize(); + std::cout << "No error detected (unexpected)" << std::endl; + } + catch (const std::overflow_error& e) + { + std::cout << "Caught overflow_error: " << e.what() << std::endl; + } + + // --------------------------------------------------------------- + // Step 2: After catching the error, the same ctx can be reused + // immediately. synchronize() already cleared the error + // state before throwing, so no recovery step is needed. + // --------------------------------------------------------------- + + std::cout << "\n=== Launching kernel with valid arithmetic ===" << std::endl; + + test_type* data = nullptr; + test_type* out = nullptr; + + cudaMallocManaged(&data, 4 * sizeof(test_type)); + cudaMallocManaged(&out, 4 * sizeof(test_type)); + cudaDeviceSynchronize(); + + data[0] = test_type{10}; + data[1] = test_type{20}; + data[2] = test_type{30}; + data[3] = test_type{40}; + + safe_kernel<<<1, 4>>>(data, out, 4); + + try + { + ctx.synchronize(); + std::cout << "No error detected (expected)" << std::endl; + } + catch (const std::exception& e) + { + std::cout << "Unexpected error: " << e.what() << std::endl; + } + + for (int i = 0; i < 4; ++i) + { + std::cout << "result[" << i << "] = " + << static_cast(out[i]) << std::endl; + } + + // --------------------------------------------------------------- + // Cleanup + // --------------------------------------------------------------- + + cudaFree(result); + cudaFree(data); + cudaFree(out); + + return 0; +} diff --git a/include/boost/safe_numbers/bit.hpp b/include/boost/safe_numbers/bit.hpp index 77b7843..35e6c33 100644 --- a/include/boost/safe_numbers/bit.hpp +++ b/include/boost/safe_numbers/bit.hpp @@ -1,3 +1,4 @@ +// Copyright 2020 Peter Dimov // Copyright 2026 Matt Borland // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt @@ -11,119 +12,341 @@ #ifndef BOOST_SAFE_NUMBERS_BUILD_MODULE -#include #include +#if (defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + +#include + +#else + +#include +#include + +#endif + #endif // BOOST_SAFE_NUMBERS_BUILD_MODULE namespace boost::safe_numbers { BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto has_single_bit(const UnsignedInt x) noexcept -> bool +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto has_single_bit(const UnsignedInt x) noexcept -> bool { - using boost::core::has_single_bit; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::has_single_bit; return has_single_bit(static_cast(x)); + + #else + + if constexpr (std::is_same_v) + { + return boost::int128::has_single_bit(static_cast(x)); + } + else + { + return cuda::std::has_single_bit(static_cast(x)); + } + + #endif } BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto bit_ceil(const UnsignedInt x) noexcept -> UnsignedInt +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto bit_ceil(const UnsignedInt x) noexcept -> UnsignedInt { - using boost::core::bit_ceil; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::bit_ceil; return UnsignedInt{bit_ceil(static_cast(x))}; + + #else + + if constexpr (std::is_same_v) + { + return UnsignedInt{boost::int128::bit_ceil(static_cast(x))}; + } + else + { + return UnsignedInt{cuda::std::bit_ceil(static_cast(x))}; + } + + #endif } BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto bit_floor(const UnsignedInt x) noexcept -> UnsignedInt +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto bit_floor(const UnsignedInt x) noexcept -> UnsignedInt { - using boost::core::bit_floor; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::bit_floor; return UnsignedInt{bit_floor(static_cast(x))}; + + #else + + if constexpr (std::is_same_v) + { + return UnsignedInt{boost::int128::bit_floor(static_cast(x))}; + } + else + { + return UnsignedInt{cuda::std::bit_floor(static_cast(x))}; + } + + #endif } BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto bit_width(const UnsignedInt x) noexcept -> int +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto bit_width(const UnsignedInt x) noexcept -> int { - using boost::core::bit_width; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::bit_width; return static_cast(bit_width(static_cast(x))); + + #else + + if constexpr (std::is_same_v) + { + return static_cast(boost::int128::bit_width(static_cast(x))); + } + else + { + return static_cast(cuda::std::bit_width(static_cast(x))); + } + + #endif } BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto rotl(const UnsignedInt x, const int s) noexcept -> UnsignedInt +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto rotl(const UnsignedInt x, const int s) noexcept -> UnsignedInt { - using boost::core::rotl; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::rotl; return UnsignedInt{rotl(static_cast(x), s)}; + + #else + + if constexpr (std::is_same_v) + { + return UnsignedInt{boost::int128::rotl(static_cast(x), s)}; + } + else + { + return UnsignedInt{cuda::std::rotl(static_cast(x), s)}; + } + + #endif } BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto rotr(const UnsignedInt x, const int s) noexcept -> UnsignedInt +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto rotr(const UnsignedInt x, const int s) noexcept -> UnsignedInt { - using boost::core::rotr; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::rotr; return UnsignedInt{rotr(static_cast(x), s)}; + + #else + + if constexpr (std::is_same_v) + { + return UnsignedInt{boost::int128::rotr(static_cast(x), s)}; + } + else + { + return UnsignedInt{cuda::std::rotr(static_cast(x), s)}; + } + + #endif } BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto countl_zero(const UnsignedInt x) noexcept -> int +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto countl_zero(const UnsignedInt x) noexcept -> int { - using boost::core::countl_zero; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::countl_zero; return countl_zero(static_cast(x)); + + #else + + if constexpr (std::is_same_v) + { + return boost::int128::countl_zero(static_cast(x)); + } + else + { + return cuda::std::countl_zero(static_cast(x)); + } + + #endif } BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto countl_one(const UnsignedInt x) noexcept -> int +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto countl_one(const UnsignedInt x) noexcept -> int { - using boost::core::countl_one; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::countl_one; return countl_one(static_cast(x)); + + #else + + if constexpr (std::is_same_v) + { + return boost::int128::countl_one(static_cast(x)); + } + else + { + return cuda::std::countl_one(static_cast(x)); + } + + #endif } BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto countr_zero(const UnsignedInt x) noexcept -> int +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto countr_zero(const UnsignedInt x) noexcept -> int { - using boost::core::countr_zero; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::countr_zero; return countr_zero(static_cast(x)); + + #else + + if constexpr (std::is_same_v) + { + return boost::int128::countr_zero(static_cast(x)); + } + else + { + return cuda::std::countr_zero(static_cast(x)); + } + + #endif } BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto countr_one(const UnsignedInt x) noexcept -> int +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto countr_one(const UnsignedInt x) noexcept -> int { - using boost::core::countr_one; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::countr_one; return countr_one(static_cast(x)); + + #else + + if constexpr (std::is_same_v) + { + return boost::int128::countr_one(static_cast(x)); + } + else + { + return cuda::std::countr_one(static_cast(x)); + } + + #endif } BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto popcount(const UnsignedInt x) noexcept -> int +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto popcount(const UnsignedInt x) noexcept -> int { - using boost::core::popcount; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::popcount; return popcount(static_cast(x)); + + #else + + if constexpr (std::is_same_v) + { + return boost::int128::popcount(static_cast(x)); + } + else + { + return cuda::std::popcount(static_cast(x)); + } + + #endif +} + +// NVCC 12 does not have byteswap builtin, only 13+ +#if (defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + +namespace detail { + +constexpr auto byteswap_impl(const std::uint8_t x) noexcept +{ + return x; +} + +constexpr auto byteswap_impl(const std::uint16_t x) noexcept +{ + return static_cast( x << 8 | x >> 8 ); +} + +constexpr auto byteswap_impl(const std::uint32_t x) noexcept +{ + const auto step16 = x << 16 | x >> 16; + return ((step16 << 8) & 0xff00ff00) | ((step16 >> 8) & 0x00ff00ff); +} + +constexpr auto byteswap_impl(const std::uint64_t x) noexcept +{ + const auto step32 = x << 32 | x >> 32; + const auto step16 = (step32 & 0x0000FFFF0000FFFFULL) << 16 | (step32 & 0xFFFF0000FFFF0000ULL) >> 16; + return (step16 & 0x00FF00FF00FF00FFULL) << 8 | (step16 & 0xFF00FF00FF00FF00ULL) >> 8; } +} // namespace detail + +#endif + BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto byteswap(const Int x) noexcept -> Int +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto byteswap(const Int x) noexcept -> Int { - using boost::core::byteswap; using underlying_type = detail::underlying_type_t; + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + using boost::core::byteswap; return Int{byteswap(static_cast(x))}; + + #else + + if constexpr (std::is_same_v) + { + return Int{boost::int128::byteswap(static_cast(x))}; + } + else + { + return Int{detail::byteswap_impl(static_cast(x))}; + } + + #endif } namespace detail { @@ -149,11 +372,21 @@ consteval auto make_byte_reverse_table() -> std::array return table; } +#if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + inline constexpr auto reverse_table {make_byte_reverse_table()}; +#endif + template -[[nodiscard]] constexpr auto bitswap_impl(UnsignedInt x) noexcept -> UnsignedInt +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto bitswap_impl(UnsignedInt x) noexcept -> UnsignedInt { + #if (defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + constexpr auto reverse_table {make_byte_reverse_table()}; + + #endif + if constexpr (sizeof(UnsignedInt) == 1) { return static_cast(reverse_table[static_cast(x)]); @@ -177,7 +410,7 @@ template } // namespace detail BOOST_SAFE_NUMBERS_EXPORT template -[[nodiscard]] constexpr auto bitswap(Int x) noexcept -> Int +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto bitswap(Int x) noexcept -> Int { using underlying_type = detail::underlying_type_t; return static_cast(detail::bitswap_impl(static_cast(x))); diff --git a/include/boost/safe_numbers/byte_conversions.hpp b/include/boost/safe_numbers/byte_conversions.hpp index cba4b15..f74fa96 100644 --- a/include/boost/safe_numbers/byte_conversions.hpp +++ b/include/boost/safe_numbers/byte_conversions.hpp @@ -21,7 +21,7 @@ namespace boost::safe_numbers { template -[[nodiscard]] constexpr auto to_be(const T value) noexcept -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto to_be(const T value) noexcept -> T { if constexpr (std::endian::native == std::endian::big) { @@ -34,14 +34,14 @@ template } template -[[nodiscard]] constexpr auto from_be(const T value) noexcept -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto from_be(const T value) noexcept -> T { // Self-inverse return to_be(value); } template -[[nodiscard]] constexpr auto to_le(const T value) noexcept -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto to_le(const T value) noexcept -> T { if constexpr (std::endian::native == std::endian::little) { @@ -54,21 +54,21 @@ template } template -[[nodiscard]] constexpr auto from_le(const T value) noexcept -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto from_le(const T value) noexcept -> T { // Self-inverse return to_le(value); } template -[[nodiscard]] constexpr auto to_be_bytes(const T value) noexcept -> std::array +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto to_be_bytes(const T value) noexcept -> std::array { const auto be_value {to_be(value)}; return std::bit_cast>(be_value); } template -[[nodiscard]] constexpr auto from_be_bytes(const std::span bytes) -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto from_be_bytes(const std::span bytes) -> T { using underlying_type = detail::underlying_type_t; @@ -103,14 +103,14 @@ template } template -[[nodiscard]] constexpr auto to_le_bytes(const T value) noexcept -> std::array +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto to_le_bytes(const T value) noexcept -> std::array { const auto le_value {to_le(value)}; return std::bit_cast>(le_value); } template -[[nodiscard]] constexpr auto from_le_bytes(const std::span bytes) -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto from_le_bytes(const std::span bytes) -> T { using underlying_type = detail::underlying_type_t; @@ -145,7 +145,7 @@ template } template -[[nodiscard]] constexpr auto to_ne_bytes(const T value) noexcept -> std::array +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto to_ne_bytes(const T value) noexcept -> std::array { if constexpr (std::endian::native == std::endian::little) { @@ -158,7 +158,7 @@ template } template -[[nodiscard]] constexpr auto from_ne_bytes(const std::span bytes) -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto from_ne_bytes(const std::span bytes) -> T { if constexpr (std::endian::native == std::endian::little) { diff --git a/include/boost/safe_numbers/charconv.hpp b/include/boost/safe_numbers/charconv.hpp index 9b43c39..bd8c4e5 100644 --- a/include/boost/safe_numbers/charconv.hpp +++ b/include/boost/safe_numbers/charconv.hpp @@ -18,7 +18,7 @@ namespace boost::charconv { template -constexpr auto from_chars(const char* first, const char* last, T& value, int base = 10) +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto from_chars(const char* first, const char* last, T& value, int base = 10) -> charconv::from_chars_result { using underlying_type = safe_numbers::detail::underlying_type_t; @@ -31,7 +31,7 @@ constexpr auto from_chars(const char* first, const char* last, T& value, int bas } template -constexpr auto to_chars(char* first, char* last, const T value, int base = 10) +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto to_chars(char* first, char* last, const T value, int base = 10) -> charconv::to_chars_result { using underlying_type = safe_numbers::detail::underlying_type_t; diff --git a/include/boost/safe_numbers/detail/cuda_error_reporting.hpp b/include/boost/safe_numbers/detail/cuda_error_reporting.hpp index c7e144e..669fe36 100644 --- a/include/boost/safe_numbers/detail/cuda_error_reporting.hpp +++ b/include/boost/safe_numbers/detail/cuda_error_reporting.hpp @@ -87,9 +87,13 @@ BOOST_SAFE_NUMBERS_HOST_DEVICE inline void copy_to_buf(char* dst, const char* sr #ifdef __CUDACC__ -// __managed__ places this in unified memory so the host can read it directly -// without cudaMemcpyFromSymbol, which fails after __trap() corrupts the device context -__managed__ cuda_device_error g_device_error = {0, 0, 0, exception_type::unknown, {'\0'}, {'\0'}}; +// Managed memory error struct accessible from both host and device. +// Since we never destroy the CUDA context, __managed__ is safe to use. +__managed__ cuda_device_error g_device_error {}; + +// Tracks whether a device_error_context instance is alive. +// Only one may exist at a time to prevent races on g_device_error. +inline bool g_device_error_context_active = false; __host__ __device__ inline void report_device_error( exception_type exc, @@ -108,19 +112,13 @@ __host__ __device__ inline void report_device_error( copy_to_buf(g_device_error.file, file, BOOST_SAFE_NUMBERS_DEVICE_ERROR_BUFFER_SIZE); copy_to_buf(g_device_error.expression, expression, BOOST_SAFE_NUMBERS_DEVICE_ERROR_BUFFER_SIZE); __threadfence_system(); - - printf("Device error on thread %d at %s:%d: %s\n", - blockIdx.x * blockDim.x + threadIdx.x, - file, line, expression); - - __trap(); } - // Other threads: spin until the trap terminates the kernel - while (true) - { - __nanosleep(1000000); - } + // Return instead of calling __trap(). This allows the kernel to + // complete normally without corrupting the CUDA context. Other + // threads may continue with incorrect values, but synchronize() + // will detect the error via the flag and throw on the host. + return; #else const auto msg = std::string(file) + ":" + std::to_string(line) + ": " + expression; @@ -154,8 +152,27 @@ class device_error_context { public: - // Clears the global state - // The error context can be reused with multiple kernels if this is called + // Clears the error state. Only one device_error_context may exist at a time. + device_error_context() + { + if (detail::g_device_error_context_active) + { + BOOST_THROW_EXCEPTION(std::logic_error( + "Only one device_error_context may exist at a time")); + } + detail::g_device_error_context_active = true; + reset(); + } + + ~device_error_context() + { + detail::g_device_error_context_active = false; + } + + device_error_context(const device_error_context&) = delete; + device_error_context& operator=(const device_error_context&) = delete; + + // Clears the error fields so the context can be reused across kernel launches. void reset() { detail::g_device_error.flag = 0; @@ -166,60 +183,54 @@ class device_error_context detail::g_device_error.expression[0] = '\0'; } - // On construction, reset the global error state to ensure we have a good start - device_error_context() - { - reset(); - } - - // Allows the user to synchronize and check for errors as is typical of CUDA - // This allows an extra step in that it will throw on the host - // Much like cudaGetLastError, the call to synchronize will destroy the information in the global context - // This allows trivial reuse of all these facilities + // Synchronizes the device and checks for errors captured by device code. + // If an error was detected, the error state is cleared (so the context + // is immediately reusable), and the appropriate std::exception is thrown. void synchronize() { const auto status = cudaDeviceSynchronize(); - // Read directly from managed memory — no cudaMemcpyFromSymbol needed - // This works even after __trap() corrupts the device context const auto flag = detail::g_device_error.flag; - const auto thread_id = detail::g_device_error.thread_id; - const auto line = detail::g_device_error.line; if (flag != 0) { + const auto thread_id = detail::g_device_error.thread_id; + const auto line = detail::g_device_error.line; + const auto exc = detail::g_device_error.exception; + std::ostringstream oss; oss << "Device error on thread " << thread_id << " at " << detail::g_device_error.file << ":" << line << ": " << detail::g_device_error.expression; - // Read exception type before reset clears it - const auto exc = detail::g_device_error.exception; + const auto msg = oss.str(); - // Clear the sticky CUDA error and reset our state - cudaGetLastError(); + // Clear the error state so the context can be reused + // immediately after catching the exception. reset(); switch (exc) { case detail::exception_type::domain_error: - BOOST_THROW_EXCEPTION(std::domain_error(oss.str())); + BOOST_THROW_EXCEPTION(std::domain_error(msg)); break; case detail::exception_type::overflow: - BOOST_THROW_EXCEPTION(std::overflow_error(oss.str())); + BOOST_THROW_EXCEPTION(std::overflow_error(msg)); break; case detail::exception_type::underflow: - BOOST_THROW_EXCEPTION(std::underflow_error(oss.str())); + BOOST_THROW_EXCEPTION(std::underflow_error(msg)); break; case detail::exception_type::unknown: [[fallthrough]]; default: - BOOST_THROW_EXCEPTION(std::runtime_error(oss.str())); + BOOST_THROW_EXCEPTION(std::runtime_error(msg)); } } - - reset(); + else + { + reset(); + } if (status != cudaSuccess) { diff --git a/include/boost/safe_numbers/detail/num_digits.hpp b/include/boost/safe_numbers/detail/num_digits.hpp index c35831d..6c6caae 100644 --- a/include/boost/safe_numbers/detail/num_digits.hpp +++ b/include/boost/safe_numbers/detail/num_digits.hpp @@ -13,11 +13,20 @@ #ifndef BOOST_SAFE_NUMBERS_BUILD_MODULE -#include #include #include #include +#if (defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + +#include + +#else + +#include + +#endif + #endif namespace boost::safe_numbers::detail { @@ -41,10 +50,14 @@ consteval auto make_powers_of_10() noexcept return table; } +#if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + inline constexpr auto powers_of_10_u32 {make_powers_of_10()}; inline constexpr auto powers_of_10_u64 {make_powers_of_10()}; inline constexpr auto powers_of_10_u128 {make_powers_of_10()}; +#endif + // ============================================================================ // num_digits: counts the number of decimal digits using MSB approximation // @@ -58,6 +71,12 @@ template requires (std::numeric_limits::digits <= 32 && std::is_unsigned_v) constexpr auto num_digits(const T init_x) noexcept -> int { + #if (defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + constexpr auto powers_of_10_u32 {make_powers_of_10()}; + + #endif + const auto x {static_cast(init_x)}; if (x == 0) @@ -65,7 +84,11 @@ constexpr auto num_digits(const T init_x) noexcept -> int return 1; } + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) const auto msb {32 - boost::core::countl_zero(x)}; + #else + const auto msb {32 - cuda::std::countl_zero(x)}; + #endif // Approximate log10 const auto estimated_digits {(msb * 1000) / 3322 + 1}; @@ -86,12 +109,22 @@ constexpr auto num_digits(const T init_x) noexcept -> int // Overload for uint64_t constexpr auto num_digits(const std::uint64_t x) noexcept -> int { + #if (defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + constexpr auto powers_of_10_u64 {make_powers_of_10()}; + + #endif + if (x <= UINT32_MAX) { return num_digits(static_cast(x)); } + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) const auto msb {64 - boost::core::countl_zero(x)}; + #else + const auto msb {64 - cuda::std::countl_zero(x)}; + #endif // Approximate log10 const auto estimated_digits {(msb * 1000) / 3322 + 1}; @@ -112,6 +145,12 @@ constexpr auto num_digits(const std::uint64_t x) noexcept -> int // Overload for uint128_t constexpr auto num_digits(const boost::int128::uint128_t& x) noexcept -> int { + #if (defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) + + constexpr auto powers_of_10_u128 {make_powers_of_10()}; + + #endif + if (x.high == UINT64_C(0)) { return num_digits(x.low); diff --git a/include/boost/safe_numbers/detail/rtz.hpp b/include/boost/safe_numbers/detail/rtz.hpp index d2018ee..2d12c98 100644 --- a/include/boost/safe_numbers/detail/rtz.hpp +++ b/include/boost/safe_numbers/detail/rtz.hpp @@ -21,7 +21,7 @@ namespace boost::safe_numbers::detail { // n is assumed to be at most of bit_width bits template -constexpr auto rotr(UInt n, unsigned int r) noexcept -> UInt +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto rotr(UInt n, unsigned int r) noexcept -> UInt { static_assert(bit_width >= std::numeric_limits::digits); @@ -38,7 +38,7 @@ struct remove_trailing_zeros_return std::size_t number_of_removed_zeros; }; -constexpr auto remove_trailing_zeros(std::uint8_t n) noexcept -> remove_trailing_zeros_return +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto remove_trailing_zeros(std::uint8_t n) noexcept -> remove_trailing_zeros_return { std::size_t s {}; @@ -55,7 +55,7 @@ constexpr auto remove_trailing_zeros(std::uint8_t n) noexcept -> remove_trailing return {n, s}; } -constexpr auto remove_trailing_zeros(std::uint16_t n) noexcept -> remove_trailing_zeros_return +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto remove_trailing_zeros(std::uint16_t n) noexcept -> remove_trailing_zeros_return { std::size_t s {}; @@ -77,7 +77,7 @@ constexpr auto remove_trailing_zeros(std::uint16_t n) noexcept -> remove_trailin return {n, s}; } -constexpr auto remove_trailing_zeros(std::uint32_t n) noexcept -> remove_trailing_zeros_return +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto remove_trailing_zeros(std::uint32_t n) noexcept -> remove_trailing_zeros_return { std::size_t s {}; @@ -104,7 +104,7 @@ constexpr auto remove_trailing_zeros(std::uint32_t n) noexcept -> remove_trailin return {n, s}; } -constexpr auto remove_trailing_zeros(std::uint64_t n) noexcept -> remove_trailing_zeros_return +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto remove_trailing_zeros(std::uint64_t n) noexcept -> remove_trailing_zeros_return { std::size_t s {}; @@ -136,7 +136,7 @@ constexpr auto remove_trailing_zeros(std::uint64_t n) noexcept -> remove_trailin return {n, s}; } -constexpr auto remove_trailing_zeros(int128::uint128_t n) noexcept -> remove_trailing_zeros_return +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto remove_trailing_zeros(int128::uint128_t n) noexcept -> remove_trailing_zeros_return { std::size_t s {}; diff --git a/include/boost/safe_numbers/integer_utilities.hpp b/include/boost/safe_numbers/integer_utilities.hpp index 991b7a9..66d207b 100644 --- a/include/boost/safe_numbers/integer_utilities.hpp +++ b/include/boost/safe_numbers/integer_utilities.hpp @@ -15,7 +15,7 @@ namespace boost::safe_numbers { // Newton's method as it can't possibly overflow, and converges rapidly template -[[nodiscard]] constexpr auto isqrt(const T val) -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto isqrt(const T val) -> T { using underlying_type = detail::underlying_type_t; @@ -42,7 +42,7 @@ template } template -[[nodiscard]] constexpr auto remove_trailing_zeros(const T n) +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto remove_trailing_zeros(const T n) { using underlying_type = detail::underlying_type_t; @@ -55,7 +55,7 @@ template } template -[[nodiscard]] constexpr auto is_power_10(const T n) -> bool +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto is_power_10(const T n) -> bool { using underlying_type = detail::underlying_type_t; @@ -64,14 +64,14 @@ template } template -[[nodiscard]] constexpr auto is_power_2(const T n) noexcept -> bool +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto is_power_2(const T n) noexcept -> bool { return has_single_bit(n); } // Integer log base 2: floor(log2(n)) == bit_width(n) - 1 template -[[nodiscard]] constexpr auto ilog2(const T n) -> int +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto ilog2(const T n) -> int { using underlying_type = detail::underlying_type_t; @@ -86,7 +86,7 @@ template // Integer log base 10: floor(ilog10(n)) == num_digits(n) - 1 // Uses MSB-based approximation with power-of-10 table lookup (O(1)) template -[[nodiscard]] constexpr auto ilog10(const T n) -> int +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto ilog10(const T n) -> int { using underlying_type = detail::underlying_type_t; @@ -101,7 +101,7 @@ template // Integer log arbitrary base: floor(log_base(n)) // Repeated division: O(log_base(n)) divisions template -[[nodiscard]] constexpr auto ilog(const T n, const T base) -> int +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto ilog(const T n, const T base) -> int { using underlying_type = detail::underlying_type_t; @@ -132,7 +132,7 @@ namespace detail { // Iterative exponentiation by squaring: O(log b) multiplications template -[[nodiscard]] constexpr auto ipow_impl(T base, T exp) -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto ipow_impl(T base, T exp) -> T { using underlying_type = underlying_type_t; @@ -157,19 +157,19 @@ template } // namespace detail template -[[nodiscard]] constexpr auto ipow(const T a, const T b) -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto ipow(const T a, const T b) -> T { return detail::ipow_impl(a, b); } template -[[nodiscard]] constexpr auto abs_diff(const T a, const T b) noexcept -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto abs_diff(const T a, const T b) noexcept -> T { return a > b ? a - b : b - a; } template -[[nodiscard]] constexpr auto div_ceil(const T a, const T b) noexcept -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto div_ceil(const T a, const T b) noexcept -> T { using underlying_type = detail::underlying_type_t; @@ -187,7 +187,7 @@ template } template -[[nodiscard]] constexpr auto next_multiple_of(const T a, const T b) noexcept -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto next_multiple_of(const T a, const T b) noexcept -> T { return div_ceil(a, b) * b; } diff --git a/include/boost/safe_numbers/numeric.hpp b/include/boost/safe_numbers/numeric.hpp index dd5e1a0..cfb95d9 100644 --- a/include/boost/safe_numbers/numeric.hpp +++ b/include/boost/safe_numbers/numeric.hpp @@ -10,14 +10,18 @@ #ifndef BOOST_SAFE_NUMBERS_BUILD_MODULE +#if (defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) +#include +#else #include +#endif #endif namespace boost::safe_numbers { template -[[nodiscard]] constexpr auto gcd(const T m, const T n) noexcept -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto gcd(const T m, const T n) noexcept -> T { using underlying_type = detail::underlying_type_t; @@ -27,12 +31,16 @@ template } else { + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) return T{static_cast(std::gcd(static_cast(m), static_cast(n)))}; + #else + return T{static_cast(cuda::std::gcd(static_cast(m), static_cast(n)))}; + #endif } } template -[[nodiscard]] constexpr auto lcm(const T m, const T n) noexcept -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto lcm(const T m, const T n) noexcept -> T { using underlying_type = detail::underlying_type_t; @@ -42,12 +50,16 @@ template } else { + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) return T{static_cast(std::lcm(static_cast(m), static_cast(n)))}; + #else + return T{static_cast(cuda::std::lcm(static_cast(m), static_cast(n)))}; + #endif } } template -[[nodiscard]] constexpr auto midpoint(const T a, const T b) noexcept -> T +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto midpoint(const T a, const T b) noexcept -> T { using underlying_type = detail::underlying_type_t; @@ -57,7 +69,11 @@ template } else { + #if !(defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA) && defined(__CUDACC__)) return T{static_cast(std::midpoint(static_cast(a), static_cast(b)))}; + #else + return T{static_cast(cuda::std::midpoint(static_cast(a), static_cast(b)))}; + #endif } } diff --git a/test/cuda_jamfile b/test/cuda_jamfile index d00564a..667f088 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -80,3 +80,232 @@ run test_cuda_u128_sub_error.cu ; run test_cuda_u128_mul_error.cu ; run test_cuda_u128_div_error.cu ; run test_cuda_u128_mod_error.cu ; + +# Bit function tests + +# u8 bit tests +run test_cuda_u8_has_single_bit.cu ; +run test_cuda_u8_bit_ceil.cu ; +run test_cuda_u8_bit_floor.cu ; +run test_cuda_u8_bit_width.cu ; +run test_cuda_u8_rotl.cu ; +run test_cuda_u8_rotr.cu ; +run test_cuda_u8_countl_zero.cu ; +run test_cuda_u8_countl_one.cu ; +run test_cuda_u8_countr_zero.cu ; +run test_cuda_u8_countr_one.cu ; +run test_cuda_u8_popcount.cu ; +run test_cuda_u8_byteswap.cu ; +run test_cuda_u8_bitswap.cu ; + +# u16 bit tests +run test_cuda_u16_has_single_bit.cu ; +run test_cuda_u16_bit_ceil.cu ; +run test_cuda_u16_bit_floor.cu ; +run test_cuda_u16_bit_width.cu ; +run test_cuda_u16_rotl.cu ; +run test_cuda_u16_rotr.cu ; +run test_cuda_u16_countl_zero.cu ; +run test_cuda_u16_countl_one.cu ; +run test_cuda_u16_countr_zero.cu ; +run test_cuda_u16_countr_one.cu ; +run test_cuda_u16_popcount.cu ; +run test_cuda_u16_byteswap.cu ; +run test_cuda_u16_bitswap.cu ; + +# u32 bit tests +run test_cuda_u32_has_single_bit.cu ; +run test_cuda_u32_bit_ceil.cu ; +run test_cuda_u32_bit_floor.cu ; +run test_cuda_u32_bit_width.cu ; +run test_cuda_u32_rotl.cu ; +run test_cuda_u32_rotr.cu ; +run test_cuda_u32_countl_zero.cu ; +run test_cuda_u32_countl_one.cu ; +run test_cuda_u32_countr_zero.cu ; +run test_cuda_u32_countr_one.cu ; +run test_cuda_u32_popcount.cu ; +run test_cuda_u32_byteswap.cu ; +run test_cuda_u32_bitswap.cu ; + +# u64 bit tests +run test_cuda_u64_has_single_bit.cu ; +run test_cuda_u64_bit_ceil.cu ; +run test_cuda_u64_bit_floor.cu ; +run test_cuda_u64_bit_width.cu ; +run test_cuda_u64_rotl.cu ; +run test_cuda_u64_rotr.cu ; +run test_cuda_u64_countl_zero.cu ; +run test_cuda_u64_countl_one.cu ; +run test_cuda_u64_countr_zero.cu ; +run test_cuda_u64_countr_one.cu ; +run test_cuda_u64_popcount.cu ; +run test_cuda_u64_byteswap.cu ; +run test_cuda_u64_bitswap.cu ; + +# u128 bit tests +run test_cuda_u128_has_single_bit.cu ; +run test_cuda_u128_bit_ceil.cu ; +run test_cuda_u128_bit_floor.cu ; +run test_cuda_u128_bit_width.cu ; +run test_cuda_u128_rotl.cu ; +run test_cuda_u128_rotr.cu ; +run test_cuda_u128_countl_zero.cu ; +run test_cuda_u128_countl_one.cu ; +run test_cuda_u128_countr_zero.cu ; +run test_cuda_u128_countr_one.cu ; +run test_cuda_u128_popcount.cu ; +run test_cuda_u128_byteswap.cu ; +run test_cuda_u128_bitswap.cu ; + +# Byte conversion tests + +# u8 byte conversion tests +run test_cuda_u8_to_be.cu ; +run test_cuda_u8_from_be.cu ; +run test_cuda_u8_to_le.cu ; +run test_cuda_u8_from_le.cu ; + +# u16 byte conversion tests +run test_cuda_u16_to_be.cu ; +run test_cuda_u16_from_be.cu ; +run test_cuda_u16_to_le.cu ; +run test_cuda_u16_from_le.cu ; + +# u32 byte conversion tests +run test_cuda_u32_to_be.cu ; +run test_cuda_u32_from_be.cu ; +run test_cuda_u32_to_le.cu ; +run test_cuda_u32_from_le.cu ; + +# u64 byte conversion tests +run test_cuda_u64_to_be.cu ; +run test_cuda_u64_from_be.cu ; +run test_cuda_u64_to_le.cu ; +run test_cuda_u64_from_le.cu ; + +# u128 byte conversion tests +run test_cuda_u128_to_be.cu ; +run test_cuda_u128_from_be.cu ; +run test_cuda_u128_to_le.cu ; +run test_cuda_u128_from_le.cu ; + +# Charconv tests + +# u8 charconv tests +run test_cuda_u8_charconv.cu ; +run test_cuda_u8_charconv_all_bases.cu ; + +# u16 charconv tests +run test_cuda_u16_charconv.cu ; +run test_cuda_u16_charconv_all_bases.cu ; + +# u32 charconv tests +run test_cuda_u32_charconv.cu ; +run test_cuda_u32_charconv_all_bases.cu ; + +# u64 charconv tests +run test_cuda_u64_charconv.cu ; +run test_cuda_u64_charconv_all_bases.cu ; + +# u128 charconv tests +run test_cuda_u128_charconv.cu ; +run test_cuda_u128_charconv_all_bases.cu ; + +# Integer utilities tests + +# u8 integer utilities tests +run test_cuda_u8_isqrt.cu ; +run test_cuda_u8_remove_trailing_zeros.cu ; +run test_cuda_u8_is_power_10.cu ; +run test_cuda_u8_is_power_2.cu ; +run test_cuda_u8_ilog2.cu ; +run test_cuda_u8_ilog10.cu ; +run test_cuda_u8_ilog.cu ; +run test_cuda_u8_ipow.cu ; +run test_cuda_u8_abs_diff.cu ; +run test_cuda_u8_div_ceil.cu ; +run test_cuda_u8_next_multiple_of.cu ; + +# u16 integer utilities tests +run test_cuda_u16_isqrt.cu ; +run test_cuda_u16_remove_trailing_zeros.cu ; +run test_cuda_u16_is_power_10.cu ; +run test_cuda_u16_is_power_2.cu ; +run test_cuda_u16_ilog2.cu ; +run test_cuda_u16_ilog10.cu ; +run test_cuda_u16_ilog.cu ; +run test_cuda_u16_ipow.cu ; +run test_cuda_u16_abs_diff.cu ; +run test_cuda_u16_div_ceil.cu ; +run test_cuda_u16_next_multiple_of.cu ; + +# u32 integer utilities tests +run test_cuda_u32_isqrt.cu ; +run test_cuda_u32_remove_trailing_zeros.cu ; +run test_cuda_u32_is_power_10.cu ; +run test_cuda_u32_is_power_2.cu ; +run test_cuda_u32_ilog2.cu ; +run test_cuda_u32_ilog10.cu ; +run test_cuda_u32_ilog.cu ; +run test_cuda_u32_ipow.cu ; +run test_cuda_u32_abs_diff.cu ; +run test_cuda_u32_div_ceil.cu ; +run test_cuda_u32_next_multiple_of.cu ; + +# u64 integer utilities tests +run test_cuda_u64_isqrt.cu ; +run test_cuda_u64_remove_trailing_zeros.cu ; +run test_cuda_u64_is_power_10.cu ; +run test_cuda_u64_is_power_2.cu ; +run test_cuda_u64_ilog2.cu ; +run test_cuda_u64_ilog10.cu ; +run test_cuda_u64_ilog.cu ; +run test_cuda_u64_ipow.cu ; +run test_cuda_u64_abs_diff.cu ; +run test_cuda_u64_div_ceil.cu ; +run test_cuda_u64_next_multiple_of.cu ; + +# u128 integer utilities tests +run test_cuda_u128_isqrt.cu ; +run test_cuda_u128_remove_trailing_zeros.cu ; +run test_cuda_u128_is_power_10.cu ; +run test_cuda_u128_is_power_2.cu ; +run test_cuda_u128_ilog2.cu ; +run test_cuda_u128_ilog10.cu ; +run test_cuda_u128_ilog.cu ; +run test_cuda_u128_ipow.cu ; +run test_cuda_u128_abs_diff.cu ; +run test_cuda_u128_div_ceil.cu ; +run test_cuda_u128_next_multiple_of.cu ; + +# Numeric tests + +# u8 numeric tests +run test_cuda_u8_gcd.cu ; +run test_cuda_u8_lcm.cu ; +run test_cuda_u8_midpoint.cu ; + +# u16 numeric tests +run test_cuda_u16_gcd.cu ; +run test_cuda_u16_lcm.cu ; +run test_cuda_u16_midpoint.cu ; + +# u32 numeric tests +run test_cuda_u32_gcd.cu ; +run test_cuda_u32_lcm.cu ; +run test_cuda_u32_midpoint.cu ; + +# u64 numeric tests +run test_cuda_u64_gcd.cu ; +run test_cuda_u64_lcm.cu ; +run test_cuda_u64_midpoint.cu ; + +# u128 numeric tests +run test_cuda_u128_gcd.cu ; +run test_cuda_u128_lcm.cu ; +run test_cuda_u128_midpoint.cu ; + +# Examples +run ../examples/cuda.cu ; +run ../examples/cuda_error_handling.cu ; diff --git a/test/test_cuda_u128_abs_diff.cu b/test/test_cuda_u128_abs_diff.cu new file mode 100644 index 0000000..fb863d7 --- /dev/null +++ b/test/test_cuda_u128_abs_diff.cu @@ -0,0 +1,90 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::abs_diff(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + boost::random::uniform_int_distribution dist2{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::abs_diff(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_bit_ceil.cu b/test/test_cuda_u128_bit_ceil.cu new file mode 100644 index 0000000..52ca8ec --- /dev/null +++ b/test/test_cuda_u128_bit_ceil.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_ceil(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)() / basis_type{2U}}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_ceil(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_bit_floor.cu b/test/test_cuda_u128_bit_floor.cu new file mode 100644 index 0000000..b22b8ba --- /dev/null +++ b/test/test_cuda_u128_bit_floor.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_floor(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_floor(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_bit_width.cu b/test/test_cuda_u128_bit_width.cu new file mode 100644 index 0000000..99ede69 --- /dev/null +++ b/test/test_cuda_u128_bit_width.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_width(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_width(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_bitswap.cu b/test/test_cuda_u128_bitswap.cu new file mode 100644 index 0000000..636d746 --- /dev/null +++ b/test/test_cuda_u128_bitswap.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bitswap(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bitswap(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_byteswap.cu b/test/test_cuda_u128_byteswap.cu new file mode 100644 index 0000000..8c449f8 --- /dev/null +++ b/test/test_cuda_u128_byteswap.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::byteswap(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::byteswap(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_charconv.cu b/test/test_cuda_u128_charconv.cu new file mode 100644 index 0000000..7a7e2b8 --- /dev/null +++ b/test/test_cuda_u128_charconv.cu @@ -0,0 +1,95 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char buf[64] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), in[i])}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed); + out[i] = parsed; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + char buf[64] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), input_vector[i])}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed); + results.push_back(parsed); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_charconv_all_bases.cu b/test/test_cuda_u128_charconv_all_bases.cu new file mode 100644 index 0000000..762ba5a --- /dev/null +++ b/test/test_cuda_u128_charconv_all_bases.cu @@ -0,0 +1,111 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + int pass_count {0}; + for (int base = 2; base <= 36; ++base) + { + char buf[256] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), in[i], base)}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed, base); + if (parsed == in[i]) + { + ++pass_count; + } + } + out[i] = pass_count; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + int pass_count {0}; + for (int base = 2; base <= 36; ++base) + { + char buf[256] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), input_vector[i], base)}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed, base); + if (parsed == input_vector[i]) + { + ++pass_count; + } + } + results.push_back(pass_count); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_countl_one.cu b/test/test_cuda_u128_countl_one.cu new file mode 100644 index 0000000..06caaef --- /dev/null +++ b/test/test_cuda_u128_countl_one.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countl_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countl_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_countl_zero.cu b/test/test_cuda_u128_countl_zero.cu new file mode 100644 index 0000000..74cbfe6 --- /dev/null +++ b/test/test_cuda_u128_countl_zero.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countl_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countl_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_countr_one.cu b/test/test_cuda_u128_countr_one.cu new file mode 100644 index 0000000..8fa6fa7 --- /dev/null +++ b/test/test_cuda_u128_countr_one.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countr_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countr_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_countr_zero.cu b/test/test_cuda_u128_countr_zero.cu new file mode 100644 index 0000000..1fd6114 --- /dev/null +++ b/test/test_cuda_u128_countr_zero.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countr_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countr_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_div_ceil.cu b/test/test_cuda_u128_div_ceil.cu new file mode 100644 index 0000000..a23045e --- /dev/null +++ b/test/test_cuda_u128_div_ceil.cu @@ -0,0 +1,90 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::div_ceil(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + boost::random::uniform_int_distribution dist2{basis_type{1U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::div_ceil(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_from_be.cu b/test/test_cuda_u128_from_be.cu new file mode 100644 index 0000000..56476d9 --- /dev/null +++ b/test/test_cuda_u128_from_be.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::from_be(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::from_be(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_from_le.cu b/test/test_cuda_u128_from_le.cu new file mode 100644 index 0000000..4558a42 --- /dev/null +++ b/test/test_cuda_u128_from_le.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::from_le(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::from_le(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_gcd.cu b/test/test_cuda_u128_gcd.cu new file mode 100644 index 0000000..d39fb2d --- /dev/null +++ b/test/test_cuda_u128_gcd.cu @@ -0,0 +1,90 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::gcd(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + boost::random::uniform_int_distribution dist2{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::gcd(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_has_single_bit.cu b/test/test_cuda_u128_has_single_bit.cu new file mode 100644 index 0000000..775398f --- /dev/null +++ b/test/test_cuda_u128_has_single_bit.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::has_single_bit(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::has_single_bit(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_ilog.cu b/test/test_cuda_u128_ilog.cu new file mode 100644 index 0000000..4b61aa3 --- /dev/null +++ b/test/test_cuda_u128_ilog.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog(in[i], test_type{static_cast(7)}); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{1U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog(input_vector[i], test_type{static_cast(7)})); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_ilog10.cu b/test/test_cuda_u128_ilog10.cu new file mode 100644 index 0000000..7c2f731 --- /dev/null +++ b/test/test_cuda_u128_ilog10.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog10(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{1U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog10(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_ilog2.cu b/test/test_cuda_u128_ilog2.cu new file mode 100644 index 0000000..2764202 --- /dev/null +++ b/test/test_cuda_u128_ilog2.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog2(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{1U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog2(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_ipow.cu b/test/test_cuda_u128_ipow.cu new file mode 100644 index 0000000..4caada7 --- /dev/null +++ b/test/test_cuda_u128_ipow.cu @@ -0,0 +1,90 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ipow(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, basis_type{10U}}; + boost::random::uniform_int_distribution dist2{basis_type{0U}, basis_type{2U}}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ipow(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_is_power_10.cu b/test/test_cuda_u128_is_power_10.cu new file mode 100644 index 0000000..5d7048f --- /dev/null +++ b/test/test_cuda_u128_is_power_10.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::is_power_10(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{1U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::is_power_10(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_is_power_2.cu b/test/test_cuda_u128_is_power_2.cu new file mode 100644 index 0000000..2775f07 --- /dev/null +++ b/test/test_cuda_u128_is_power_2.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::is_power_2(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::is_power_2(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_isqrt.cu b/test/test_cuda_u128_isqrt.cu new file mode 100644 index 0000000..a54b27f --- /dev/null +++ b/test/test_cuda_u128_isqrt.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::isqrt(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::isqrt(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_lcm.cu b/test/test_cuda_u128_lcm.cu new file mode 100644 index 0000000..ba04d7e --- /dev/null +++ b/test/test_cuda_u128_lcm.cu @@ -0,0 +1,90 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::lcm(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{1U}, basis_type{10U}}; + boost::random::uniform_int_distribution dist2{basis_type{1U}, basis_type{10U}}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::lcm(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_midpoint.cu b/test/test_cuda_u128_midpoint.cu new file mode 100644 index 0000000..00ea8d2 --- /dev/null +++ b/test/test_cuda_u128_midpoint.cu @@ -0,0 +1,90 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::midpoint(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + boost::random::uniform_int_distribution dist2{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::midpoint(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_next_multiple_of.cu b/test/test_cuda_u128_next_multiple_of.cu new file mode 100644 index 0000000..313092a --- /dev/null +++ b/test/test_cuda_u128_next_multiple_of.cu @@ -0,0 +1,90 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::next_multiple_of(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)() / basis_type{2U}}; + boost::random::uniform_int_distribution dist2{basis_type{1U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::next_multiple_of(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_popcount.cu b/test/test_cuda_u128_popcount.cu new file mode 100644 index 0000000..ad62bd6 --- /dev/null +++ b/test/test_cuda_u128_popcount.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::popcount(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::popcount(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_remove_trailing_zeros.cu b/test/test_cuda_u128_remove_trailing_zeros.cu new file mode 100644 index 0000000..5ef04b7 --- /dev/null +++ b/test/test_cuda_u128_remove_trailing_zeros.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = test_type{boost::safe_numbers::remove_trailing_zeros(in[i]).trimmed_number}; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{1U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(test_type{boost::safe_numbers::remove_trailing_zeros(input_vector[i]).trimmed_number}); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_rotl.cu b/test/test_cuda_u128_rotl.cu new file mode 100644 index 0000000..3b33dd7 --- /dev/null +++ b/test/test_cuda_u128_rotl.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::rotl(in[i], 3); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::rotl(input_vector[i], 3)); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_rotr.cu b/test/test_cuda_u128_rotr.cu new file mode 100644 index 0000000..6e259de --- /dev/null +++ b/test/test_cuda_u128_rotr.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::rotr(in[i], 3); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::rotr(input_vector[i], 3)); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_to_be.cu b/test/test_cuda_u128_to_be.cu new file mode 100644 index 0000000..0e1e6cf --- /dev/null +++ b/test/test_cuda_u128_to_be.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::to_be(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::to_be(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u128_to_le.cu b/test/test_cuda_u128_to_le.cu new file mode 100644 index 0000000..461d47d --- /dev/null +++ b/test/test_cuda_u128_to_le.cu @@ -0,0 +1,87 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::to_le(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{basis_type{0U}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::to_le(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_abs_diff.cu b/test/test_cuda_u16_abs_diff.cu new file mode 100644 index 0000000..830196a --- /dev/null +++ b/test/test_cuda_u16_abs_diff.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::abs_diff(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::abs_diff(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_bit_ceil.cu b/test/test_cuda_u16_bit_ceil.cu new file mode 100644 index 0000000..a8ddfb7 --- /dev/null +++ b/test/test_cuda_u16_bit_ceil.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_ceil(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)()) / 2U}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_ceil(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_bit_floor.cu b/test/test_cuda_u16_bit_floor.cu new file mode 100644 index 0000000..ef63dbe --- /dev/null +++ b/test/test_cuda_u16_bit_floor.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_floor(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_floor(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_bit_width.cu b/test/test_cuda_u16_bit_width.cu new file mode 100644 index 0000000..2085fc5 --- /dev/null +++ b/test/test_cuda_u16_bit_width.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_width(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_width(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_bitswap.cu b/test/test_cuda_u16_bitswap.cu new file mode 100644 index 0000000..db46116 --- /dev/null +++ b/test/test_cuda_u16_bitswap.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bitswap(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bitswap(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_byteswap.cu b/test/test_cuda_u16_byteswap.cu new file mode 100644 index 0000000..a9fcb6d --- /dev/null +++ b/test/test_cuda_u16_byteswap.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::byteswap(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::byteswap(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_charconv.cu b/test/test_cuda_u16_charconv.cu new file mode 100644 index 0000000..cd53a19 --- /dev/null +++ b/test/test_cuda_u16_charconv.cu @@ -0,0 +1,90 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char buf[64] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), in[i])}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed); + out[i] = parsed; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + char buf[64] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), input_vector[i])}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed); + results.push_back(parsed); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_charconv_all_bases.cu b/test/test_cuda_u16_charconv_all_bases.cu new file mode 100644 index 0000000..50f64a7 --- /dev/null +++ b/test/test_cuda_u16_charconv_all_bases.cu @@ -0,0 +1,106 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + int pass_count {0}; + for (int base = 2; base <= 36; ++base) + { + char buf[256] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), in[i], base)}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed, base); + if (parsed == in[i]) + { + ++pass_count; + } + } + out[i] = pass_count; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + int pass_count {0}; + for (int base = 2; base <= 36; ++base) + { + char buf[256] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), input_vector[i], base)}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed, base); + if (parsed == input_vector[i]) + { + ++pass_count; + } + } + results.push_back(pass_count); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_countl_one.cu b/test/test_cuda_u16_countl_one.cu new file mode 100644 index 0000000..1fcc61c --- /dev/null +++ b/test/test_cuda_u16_countl_one.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countl_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countl_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_countl_zero.cu b/test/test_cuda_u16_countl_zero.cu new file mode 100644 index 0000000..e78dc50 --- /dev/null +++ b/test/test_cuda_u16_countl_zero.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countl_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countl_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_countr_one.cu b/test/test_cuda_u16_countr_one.cu new file mode 100644 index 0000000..f900927 --- /dev/null +++ b/test/test_cuda_u16_countr_one.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countr_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countr_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_countr_zero.cu b/test/test_cuda_u16_countr_zero.cu new file mode 100644 index 0000000..4feddc9 --- /dev/null +++ b/test/test_cuda_u16_countr_zero.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countr_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countr_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_div_ceil.cu b/test/test_cuda_u16_div_ceil.cu new file mode 100644 index 0000000..52013ad --- /dev/null +++ b/test/test_cuda_u16_div_ceil.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::div_ceil(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::div_ceil(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_from_be.cu b/test/test_cuda_u16_from_be.cu new file mode 100644 index 0000000..1eda1cd --- /dev/null +++ b/test/test_cuda_u16_from_be.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::from_be(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::from_be(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_from_le.cu b/test/test_cuda_u16_from_le.cu new file mode 100644 index 0000000..389a8fd --- /dev/null +++ b/test/test_cuda_u16_from_le.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::from_le(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::from_le(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_gcd.cu b/test/test_cuda_u16_gcd.cu new file mode 100644 index 0000000..0bff419 --- /dev/null +++ b/test/test_cuda_u16_gcd.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::gcd(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::gcd(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_has_single_bit.cu b/test/test_cuda_u16_has_single_bit.cu new file mode 100644 index 0000000..39bb369 --- /dev/null +++ b/test/test_cuda_u16_has_single_bit.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::has_single_bit(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::has_single_bit(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_ilog.cu b/test/test_cuda_u16_ilog.cu new file mode 100644 index 0000000..39bf5de --- /dev/null +++ b/test/test_cuda_u16_ilog.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog(in[i], test_type{static_cast(7)}); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog(input_vector[i], test_type{static_cast(7)})); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_ilog10.cu b/test/test_cuda_u16_ilog10.cu new file mode 100644 index 0000000..3eff426 --- /dev/null +++ b/test/test_cuda_u16_ilog10.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog10(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog10(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_ilog2.cu b/test/test_cuda_u16_ilog2.cu new file mode 100644 index 0000000..23696c4 --- /dev/null +++ b/test/test_cuda_u16_ilog2.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog2(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog2(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_ipow.cu b/test/test_cuda_u16_ipow.cu new file mode 100644 index 0000000..a45070a --- /dev/null +++ b/test/test_cuda_u16_ipow.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ipow(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, 10U}; + std::uniform_int_distribution dist2{0U, 2U}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ipow(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_is_power_10.cu b/test/test_cuda_u16_is_power_10.cu new file mode 100644 index 0000000..1d680da --- /dev/null +++ b/test/test_cuda_u16_is_power_10.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::is_power_10(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::is_power_10(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_is_power_2.cu b/test/test_cuda_u16_is_power_2.cu new file mode 100644 index 0000000..e53ae2c --- /dev/null +++ b/test/test_cuda_u16_is_power_2.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::is_power_2(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::is_power_2(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_isqrt.cu b/test/test_cuda_u16_isqrt.cu new file mode 100644 index 0000000..7f9708e --- /dev/null +++ b/test/test_cuda_u16_isqrt.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::isqrt(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::isqrt(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_lcm.cu b/test/test_cuda_u16_lcm.cu new file mode 100644 index 0000000..2f2f420 --- /dev/null +++ b/test/test_cuda_u16_lcm.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::lcm(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, 10U}; + std::uniform_int_distribution dist2{1U, 10U}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::lcm(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_midpoint.cu b/test/test_cuda_u16_midpoint.cu new file mode 100644 index 0000000..136056b --- /dev/null +++ b/test/test_cuda_u16_midpoint.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::midpoint(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::midpoint(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_next_multiple_of.cu b/test/test_cuda_u16_next_multiple_of.cu new file mode 100644 index 0000000..f597589 --- /dev/null +++ b/test/test_cuda_u16_next_multiple_of.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::next_multiple_of(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)()) / 2U}; + std::uniform_int_distribution dist2{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::next_multiple_of(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_popcount.cu b/test/test_cuda_u16_popcount.cu new file mode 100644 index 0000000..b883bf3 --- /dev/null +++ b/test/test_cuda_u16_popcount.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::popcount(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::popcount(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_remove_trailing_zeros.cu b/test/test_cuda_u16_remove_trailing_zeros.cu new file mode 100644 index 0000000..85559e2 --- /dev/null +++ b/test/test_cuda_u16_remove_trailing_zeros.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = test_type{boost::safe_numbers::remove_trailing_zeros(in[i]).trimmed_number}; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(test_type{boost::safe_numbers::remove_trailing_zeros(input_vector[i]).trimmed_number}); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_rotl.cu b/test/test_cuda_u16_rotl.cu new file mode 100644 index 0000000..94f331a --- /dev/null +++ b/test/test_cuda_u16_rotl.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::rotl(in[i], 3); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::rotl(input_vector[i], 3)); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_rotr.cu b/test/test_cuda_u16_rotr.cu new file mode 100644 index 0000000..eeda3d0 --- /dev/null +++ b/test/test_cuda_u16_rotr.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::rotr(in[i], 3); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::rotr(input_vector[i], 3)); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_to_be.cu b/test/test_cuda_u16_to_be.cu new file mode 100644 index 0000000..9268e37 --- /dev/null +++ b/test/test_cuda_u16_to_be.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::to_be(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::to_be(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u16_to_le.cu b/test/test_cuda_u16_to_le.cu new file mode 100644 index 0000000..7b0ce48 --- /dev/null +++ b/test/test_cuda_u16_to_le.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::to_le(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::to_le(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_abs_diff.cu b/test/test_cuda_u32_abs_diff.cu new file mode 100644 index 0000000..213bfe8 --- /dev/null +++ b/test/test_cuda_u32_abs_diff.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::abs_diff(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::abs_diff(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_bit_ceil.cu b/test/test_cuda_u32_bit_ceil.cu new file mode 100644 index 0000000..f1b130a --- /dev/null +++ b/test/test_cuda_u32_bit_ceil.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_ceil(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)() / basis_type{2}}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_ceil(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_bit_floor.cu b/test/test_cuda_u32_bit_floor.cu new file mode 100644 index 0000000..987c4ba --- /dev/null +++ b/test/test_cuda_u32_bit_floor.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_floor(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_floor(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_bit_width.cu b/test/test_cuda_u32_bit_width.cu new file mode 100644 index 0000000..4ea5784 --- /dev/null +++ b/test/test_cuda_u32_bit_width.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_width(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_width(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_bitswap.cu b/test/test_cuda_u32_bitswap.cu new file mode 100644 index 0000000..2c8c5b2 --- /dev/null +++ b/test/test_cuda_u32_bitswap.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bitswap(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bitswap(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_byteswap.cu b/test/test_cuda_u32_byteswap.cu new file mode 100644 index 0000000..300e1d9 --- /dev/null +++ b/test/test_cuda_u32_byteswap.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::byteswap(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::byteswap(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_charconv.cu b/test/test_cuda_u32_charconv.cu new file mode 100644 index 0000000..b618742 --- /dev/null +++ b/test/test_cuda_u32_charconv.cu @@ -0,0 +1,90 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char buf[64] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), in[i])}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed); + out[i] = parsed; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + char buf[64] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), input_vector[i])}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed); + results.push_back(parsed); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_charconv_all_bases.cu b/test/test_cuda_u32_charconv_all_bases.cu new file mode 100644 index 0000000..70c43e7 --- /dev/null +++ b/test/test_cuda_u32_charconv_all_bases.cu @@ -0,0 +1,106 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + int pass_count {0}; + for (int base = 2; base <= 36; ++base) + { + char buf[256] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), in[i], base)}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed, base); + if (parsed == in[i]) + { + ++pass_count; + } + } + out[i] = pass_count; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + int pass_count {0}; + for (int base = 2; base <= 36; ++base) + { + char buf[256] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), input_vector[i], base)}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed, base); + if (parsed == input_vector[i]) + { + ++pass_count; + } + } + results.push_back(pass_count); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_countl_one.cu b/test/test_cuda_u32_countl_one.cu new file mode 100644 index 0000000..b5d40e5 --- /dev/null +++ b/test/test_cuda_u32_countl_one.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countl_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countl_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_countl_zero.cu b/test/test_cuda_u32_countl_zero.cu new file mode 100644 index 0000000..f5b4284 --- /dev/null +++ b/test/test_cuda_u32_countl_zero.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countl_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countl_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_countr_one.cu b/test/test_cuda_u32_countr_one.cu new file mode 100644 index 0000000..3e687cb --- /dev/null +++ b/test/test_cuda_u32_countr_one.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countr_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countr_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_countr_zero.cu b/test/test_cuda_u32_countr_zero.cu new file mode 100644 index 0000000..99028ef --- /dev/null +++ b/test/test_cuda_u32_countr_zero.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countr_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countr_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_div_ceil.cu b/test/test_cuda_u32_div_ceil.cu new file mode 100644 index 0000000..83def48 --- /dev/null +++ b/test/test_cuda_u32_div_ceil.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::div_ceil(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::div_ceil(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_from_be.cu b/test/test_cuda_u32_from_be.cu new file mode 100644 index 0000000..409cdda --- /dev/null +++ b/test/test_cuda_u32_from_be.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::from_be(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::from_be(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_from_le.cu b/test/test_cuda_u32_from_le.cu new file mode 100644 index 0000000..358f4f4 --- /dev/null +++ b/test/test_cuda_u32_from_le.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::from_le(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::from_le(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_gcd.cu b/test/test_cuda_u32_gcd.cu new file mode 100644 index 0000000..0ca6178 --- /dev/null +++ b/test/test_cuda_u32_gcd.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::gcd(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::gcd(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_has_single_bit.cu b/test/test_cuda_u32_has_single_bit.cu new file mode 100644 index 0000000..308d0a6 --- /dev/null +++ b/test/test_cuda_u32_has_single_bit.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::has_single_bit(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::has_single_bit(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_ilog.cu b/test/test_cuda_u32_ilog.cu new file mode 100644 index 0000000..b98cd7b --- /dev/null +++ b/test/test_cuda_u32_ilog.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog(in[i], test_type{static_cast(7)}); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog(input_vector[i], test_type{static_cast(7)})); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_ilog10.cu b/test/test_cuda_u32_ilog10.cu new file mode 100644 index 0000000..9302d56 --- /dev/null +++ b/test/test_cuda_u32_ilog10.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog10(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog10(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_ilog2.cu b/test/test_cuda_u32_ilog2.cu new file mode 100644 index 0000000..85b2e9d --- /dev/null +++ b/test/test_cuda_u32_ilog2.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog2(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog2(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_ipow.cu b/test/test_cuda_u32_ipow.cu new file mode 100644 index 0000000..e8c1f1d --- /dev/null +++ b/test/test_cuda_u32_ipow.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ipow(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, basis_type{10}}; + std::uniform_int_distribution dist2{basis_type{0}, basis_type{2}}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ipow(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_is_power_10.cu b/test/test_cuda_u32_is_power_10.cu new file mode 100644 index 0000000..3d8b03c --- /dev/null +++ b/test/test_cuda_u32_is_power_10.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::is_power_10(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::is_power_10(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_is_power_2.cu b/test/test_cuda_u32_is_power_2.cu new file mode 100644 index 0000000..381e674 --- /dev/null +++ b/test/test_cuda_u32_is_power_2.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::is_power_2(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::is_power_2(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_isqrt.cu b/test/test_cuda_u32_isqrt.cu new file mode 100644 index 0000000..a6fcb8c --- /dev/null +++ b/test/test_cuda_u32_isqrt.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::isqrt(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::isqrt(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_lcm.cu b/test/test_cuda_u32_lcm.cu new file mode 100644 index 0000000..09019b9 --- /dev/null +++ b/test/test_cuda_u32_lcm.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::lcm(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, basis_type{10}}; + std::uniform_int_distribution dist2{basis_type{1}, basis_type{10}}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::lcm(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_midpoint.cu b/test/test_cuda_u32_midpoint.cu new file mode 100644 index 0000000..aadfcac --- /dev/null +++ b/test/test_cuda_u32_midpoint.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::midpoint(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::midpoint(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_next_multiple_of.cu b/test/test_cuda_u32_next_multiple_of.cu new file mode 100644 index 0000000..3371948 --- /dev/null +++ b/test/test_cuda_u32_next_multiple_of.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::next_multiple_of(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)() / basis_type{2}}; + std::uniform_int_distribution dist2{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::next_multiple_of(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_popcount.cu b/test/test_cuda_u32_popcount.cu new file mode 100644 index 0000000..1b2678c --- /dev/null +++ b/test/test_cuda_u32_popcount.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::popcount(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::popcount(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_remove_trailing_zeros.cu b/test/test_cuda_u32_remove_trailing_zeros.cu new file mode 100644 index 0000000..1e8c9e4 --- /dev/null +++ b/test/test_cuda_u32_remove_trailing_zeros.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = test_type{boost::safe_numbers::remove_trailing_zeros(in[i]).trimmed_number}; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(test_type{boost::safe_numbers::remove_trailing_zeros(input_vector[i]).trimmed_number}); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_rotl.cu b/test/test_cuda_u32_rotl.cu new file mode 100644 index 0000000..ed06774 --- /dev/null +++ b/test/test_cuda_u32_rotl.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::rotl(in[i], 3); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::rotl(input_vector[i], 3)); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_rotr.cu b/test/test_cuda_u32_rotr.cu new file mode 100644 index 0000000..9b6b7a2 --- /dev/null +++ b/test/test_cuda_u32_rotr.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::rotr(in[i], 3); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::rotr(input_vector[i], 3)); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_to_be.cu b/test/test_cuda_u32_to_be.cu new file mode 100644 index 0000000..abf22d7 --- /dev/null +++ b/test/test_cuda_u32_to_be.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::to_be(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::to_be(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u32_to_le.cu b/test/test_cuda_u32_to_le.cu new file mode 100644 index 0000000..2d31f30 --- /dev/null +++ b/test/test_cuda_u32_to_le.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::to_le(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::to_le(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_abs_diff.cu b/test/test_cuda_u64_abs_diff.cu new file mode 100644 index 0000000..038f420 --- /dev/null +++ b/test/test_cuda_u64_abs_diff.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::abs_diff(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::abs_diff(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_bit_ceil.cu b/test/test_cuda_u64_bit_ceil.cu new file mode 100644 index 0000000..885f44c --- /dev/null +++ b/test/test_cuda_u64_bit_ceil.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_ceil(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)() / basis_type{2}}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_ceil(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_bit_floor.cu b/test/test_cuda_u64_bit_floor.cu new file mode 100644 index 0000000..18b61b0 --- /dev/null +++ b/test/test_cuda_u64_bit_floor.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_floor(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_floor(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_bit_width.cu b/test/test_cuda_u64_bit_width.cu new file mode 100644 index 0000000..ec04975 --- /dev/null +++ b/test/test_cuda_u64_bit_width.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_width(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_width(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_bitswap.cu b/test/test_cuda_u64_bitswap.cu new file mode 100644 index 0000000..ca98035 --- /dev/null +++ b/test/test_cuda_u64_bitswap.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bitswap(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bitswap(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_byteswap.cu b/test/test_cuda_u64_byteswap.cu new file mode 100644 index 0000000..e07beb1 --- /dev/null +++ b/test/test_cuda_u64_byteswap.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::byteswap(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::byteswap(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_charconv.cu b/test/test_cuda_u64_charconv.cu new file mode 100644 index 0000000..75998ff --- /dev/null +++ b/test/test_cuda_u64_charconv.cu @@ -0,0 +1,90 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char buf[64] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), in[i])}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed); + out[i] = parsed; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + char buf[64] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), input_vector[i])}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed); + results.push_back(parsed); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_charconv_all_bases.cu b/test/test_cuda_u64_charconv_all_bases.cu new file mode 100644 index 0000000..5de01ea --- /dev/null +++ b/test/test_cuda_u64_charconv_all_bases.cu @@ -0,0 +1,106 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + int pass_count {0}; + for (int base = 2; base <= 36; ++base) + { + char buf[256] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), in[i], base)}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed, base); + if (parsed == in[i]) + { + ++pass_count; + } + } + out[i] = pass_count; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + int pass_count {0}; + for (int base = 2; base <= 36; ++base) + { + char buf[256] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), input_vector[i], base)}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed, base); + if (parsed == input_vector[i]) + { + ++pass_count; + } + } + results.push_back(pass_count); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_countl_one.cu b/test/test_cuda_u64_countl_one.cu new file mode 100644 index 0000000..4f52634 --- /dev/null +++ b/test/test_cuda_u64_countl_one.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countl_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countl_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_countl_zero.cu b/test/test_cuda_u64_countl_zero.cu new file mode 100644 index 0000000..81d3d67 --- /dev/null +++ b/test/test_cuda_u64_countl_zero.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countl_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countl_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_countr_one.cu b/test/test_cuda_u64_countr_one.cu new file mode 100644 index 0000000..de86742 --- /dev/null +++ b/test/test_cuda_u64_countr_one.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countr_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countr_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_countr_zero.cu b/test/test_cuda_u64_countr_zero.cu new file mode 100644 index 0000000..c348275 --- /dev/null +++ b/test/test_cuda_u64_countr_zero.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countr_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countr_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_div_ceil.cu b/test/test_cuda_u64_div_ceil.cu new file mode 100644 index 0000000..4bd8b1b --- /dev/null +++ b/test/test_cuda_u64_div_ceil.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::div_ceil(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::div_ceil(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_from_be.cu b/test/test_cuda_u64_from_be.cu new file mode 100644 index 0000000..e867176 --- /dev/null +++ b/test/test_cuda_u64_from_be.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::from_be(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::from_be(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_from_le.cu b/test/test_cuda_u64_from_le.cu new file mode 100644 index 0000000..29e4024 --- /dev/null +++ b/test/test_cuda_u64_from_le.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::from_le(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::from_le(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_gcd.cu b/test/test_cuda_u64_gcd.cu new file mode 100644 index 0000000..6d0dc83 --- /dev/null +++ b/test/test_cuda_u64_gcd.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::gcd(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::gcd(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_has_single_bit.cu b/test/test_cuda_u64_has_single_bit.cu new file mode 100644 index 0000000..76012cd --- /dev/null +++ b/test/test_cuda_u64_has_single_bit.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::has_single_bit(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::has_single_bit(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_ilog.cu b/test/test_cuda_u64_ilog.cu new file mode 100644 index 0000000..430ae89 --- /dev/null +++ b/test/test_cuda_u64_ilog.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog(in[i], test_type{static_cast(7)}); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog(input_vector[i], test_type{static_cast(7)})); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_ilog10.cu b/test/test_cuda_u64_ilog10.cu new file mode 100644 index 0000000..2c67863 --- /dev/null +++ b/test/test_cuda_u64_ilog10.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog10(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog10(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_ilog2.cu b/test/test_cuda_u64_ilog2.cu new file mode 100644 index 0000000..375c119 --- /dev/null +++ b/test/test_cuda_u64_ilog2.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog2(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog2(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_ipow.cu b/test/test_cuda_u64_ipow.cu new file mode 100644 index 0000000..be09471 --- /dev/null +++ b/test/test_cuda_u64_ipow.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ipow(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, basis_type{10}}; + std::uniform_int_distribution dist2{basis_type{0}, basis_type{2}}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ipow(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_is_power_10.cu b/test/test_cuda_u64_is_power_10.cu new file mode 100644 index 0000000..0f2fb55 --- /dev/null +++ b/test/test_cuda_u64_is_power_10.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::is_power_10(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::is_power_10(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_is_power_2.cu b/test/test_cuda_u64_is_power_2.cu new file mode 100644 index 0000000..c823c6f --- /dev/null +++ b/test/test_cuda_u64_is_power_2.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::is_power_2(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::is_power_2(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_isqrt.cu b/test/test_cuda_u64_isqrt.cu new file mode 100644 index 0000000..ba5a5ae --- /dev/null +++ b/test/test_cuda_u64_isqrt.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::isqrt(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::isqrt(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_lcm.cu b/test/test_cuda_u64_lcm.cu new file mode 100644 index 0000000..24dce83 --- /dev/null +++ b/test/test_cuda_u64_lcm.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::lcm(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, basis_type{10}}; + std::uniform_int_distribution dist2{basis_type{1}, basis_type{10}}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::lcm(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_midpoint.cu b/test/test_cuda_u64_midpoint.cu new file mode 100644 index 0000000..bba9451 --- /dev/null +++ b/test/test_cuda_u64_midpoint.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::midpoint(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::midpoint(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_next_multiple_of.cu b/test/test_cuda_u64_next_multiple_of.cu new file mode 100644 index 0000000..fd6f1a7 --- /dev/null +++ b/test/test_cuda_u64_next_multiple_of.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::next_multiple_of(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)() / basis_type{2}}; + std::uniform_int_distribution dist2{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::next_multiple_of(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_popcount.cu b/test/test_cuda_u64_popcount.cu new file mode 100644 index 0000000..e48df1d --- /dev/null +++ b/test/test_cuda_u64_popcount.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::popcount(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::popcount(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_remove_trailing_zeros.cu b/test/test_cuda_u64_remove_trailing_zeros.cu new file mode 100644 index 0000000..0a58876 --- /dev/null +++ b/test/test_cuda_u64_remove_trailing_zeros.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = test_type{boost::safe_numbers::remove_trailing_zeros(in[i]).trimmed_number}; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(test_type{boost::safe_numbers::remove_trailing_zeros(input_vector[i]).trimmed_number}); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_rotl.cu b/test/test_cuda_u64_rotl.cu new file mode 100644 index 0000000..a3240b1 --- /dev/null +++ b/test/test_cuda_u64_rotl.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::rotl(in[i], 3); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::rotl(input_vector[i], 3)); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_rotr.cu b/test/test_cuda_u64_rotr.cu new file mode 100644 index 0000000..3cf95de --- /dev/null +++ b/test/test_cuda_u64_rotr.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::rotr(in[i], 3); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::rotr(input_vector[i], 3)); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_to_be.cu b/test/test_cuda_u64_to_be.cu new file mode 100644 index 0000000..01613a0 --- /dev/null +++ b/test/test_cuda_u64_to_be.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::to_be(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::to_be(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u64_to_le.cu b/test/test_cuda_u64_to_le.cu new file mode 100644 index 0000000..80ce98a --- /dev/null +++ b/test/test_cuda_u64_to_le.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::to_le(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{basis_type{0}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::to_le(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_abs_diff.cu b/test/test_cuda_u8_abs_diff.cu new file mode 100644 index 0000000..3d0e736 --- /dev/null +++ b/test/test_cuda_u8_abs_diff.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::abs_diff(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::abs_diff(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_bit_ceil.cu b/test/test_cuda_u8_bit_ceil.cu new file mode 100644 index 0000000..f84d37d --- /dev/null +++ b/test/test_cuda_u8_bit_ceil.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_ceil(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)()) / 2U}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_ceil(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_bit_floor.cu b/test/test_cuda_u8_bit_floor.cu new file mode 100644 index 0000000..5ef0598 --- /dev/null +++ b/test/test_cuda_u8_bit_floor.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_floor(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_floor(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_bit_width.cu b/test/test_cuda_u8_bit_width.cu new file mode 100644 index 0000000..3ae3bbc --- /dev/null +++ b/test/test_cuda_u8_bit_width.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bit_width(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bit_width(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_bitswap.cu b/test/test_cuda_u8_bitswap.cu new file mode 100644 index 0000000..d2d0d56 --- /dev/null +++ b/test/test_cuda_u8_bitswap.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::bitswap(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::bitswap(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_byteswap.cu b/test/test_cuda_u8_byteswap.cu new file mode 100644 index 0000000..9a6e4d5 --- /dev/null +++ b/test/test_cuda_u8_byteswap.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::byteswap(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::byteswap(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_charconv.cu b/test/test_cuda_u8_charconv.cu new file mode 100644 index 0000000..daaed8c --- /dev/null +++ b/test/test_cuda_u8_charconv.cu @@ -0,0 +1,90 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char buf[64] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), in[i])}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed); + out[i] = parsed; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + char buf[64] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), input_vector[i])}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed); + results.push_back(parsed); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_charconv_all_bases.cu b/test/test_cuda_u8_charconv_all_bases.cu new file mode 100644 index 0000000..481caf1 --- /dev/null +++ b/test/test_cuda_u8_charconv_all_bases.cu @@ -0,0 +1,106 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + int pass_count {0}; + for (int base = 2; base <= 36; ++base) + { + char buf[256] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), in[i], base)}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed, base); + if (parsed == in[i]) + { + ++pass_count; + } + } + out[i] = pass_count; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + int pass_count {0}; + for (int base = 2; base <= 36; ++base) + { + char buf[256] {}; + auto tc_result {boost::charconv::to_chars(buf, buf + sizeof(buf), input_vector[i], base)}; + test_type parsed {}; + boost::charconv::from_chars(buf, tc_result.ptr, parsed, base); + if (parsed == input_vector[i]) + { + ++pass_count; + } + } + results.push_back(pass_count); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_countl_one.cu b/test/test_cuda_u8_countl_one.cu new file mode 100644 index 0000000..b5b89fe --- /dev/null +++ b/test/test_cuda_u8_countl_one.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countl_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countl_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_countl_zero.cu b/test/test_cuda_u8_countl_zero.cu new file mode 100644 index 0000000..68ba382 --- /dev/null +++ b/test/test_cuda_u8_countl_zero.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countl_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countl_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_countr_one.cu b/test/test_cuda_u8_countr_one.cu new file mode 100644 index 0000000..4466c8c --- /dev/null +++ b/test/test_cuda_u8_countr_one.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countr_one(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countr_one(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_countr_zero.cu b/test/test_cuda_u8_countr_zero.cu new file mode 100644 index 0000000..9902dd0 --- /dev/null +++ b/test/test_cuda_u8_countr_zero.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::countr_zero(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::countr_zero(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_div_ceil.cu b/test/test_cuda_u8_div_ceil.cu new file mode 100644 index 0000000..9a4d4f2 --- /dev/null +++ b/test/test_cuda_u8_div_ceil.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::div_ceil(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::div_ceil(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_from_be.cu b/test/test_cuda_u8_from_be.cu new file mode 100644 index 0000000..b914fdd --- /dev/null +++ b/test/test_cuda_u8_from_be.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::from_be(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::from_be(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_from_le.cu b/test/test_cuda_u8_from_le.cu new file mode 100644 index 0000000..4f669cc --- /dev/null +++ b/test/test_cuda_u8_from_le.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::from_le(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::from_le(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_gcd.cu b/test/test_cuda_u8_gcd.cu new file mode 100644 index 0000000..3ab02ee --- /dev/null +++ b/test/test_cuda_u8_gcd.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::gcd(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::gcd(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_has_single_bit.cu b/test/test_cuda_u8_has_single_bit.cu new file mode 100644 index 0000000..4c30350 --- /dev/null +++ b/test/test_cuda_u8_has_single_bit.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::has_single_bit(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::has_single_bit(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_ilog.cu b/test/test_cuda_u8_ilog.cu new file mode 100644 index 0000000..8fcdce1 --- /dev/null +++ b/test/test_cuda_u8_ilog.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog(in[i], test_type{static_cast(7)}); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog(input_vector[i], test_type{static_cast(7)})); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_ilog10.cu b/test/test_cuda_u8_ilog10.cu new file mode 100644 index 0000000..3bde939 --- /dev/null +++ b/test/test_cuda_u8_ilog10.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog10(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog10(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_ilog2.cu b/test/test_cuda_u8_ilog2.cu new file mode 100644 index 0000000..adf4094 --- /dev/null +++ b/test/test_cuda_u8_ilog2.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ilog2(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ilog2(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_ipow.cu b/test/test_cuda_u8_ipow.cu new file mode 100644 index 0000000..ca643a8 --- /dev/null +++ b/test/test_cuda_u8_ipow.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::ipow(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, 10U}; + std::uniform_int_distribution dist2{0U, 2U}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::ipow(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_is_power_10.cu b/test/test_cuda_u8_is_power_10.cu new file mode 100644 index 0000000..7cee6aa --- /dev/null +++ b/test/test_cuda_u8_is_power_10.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::is_power_10(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::is_power_10(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_is_power_2.cu b/test/test_cuda_u8_is_power_2.cu new file mode 100644 index 0000000..4405f2d --- /dev/null +++ b/test/test_cuda_u8_is_power_2.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = static_cast(boost::safe_numbers::is_power_2(in[i])); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(static_cast(boost::safe_numbers::is_power_2(input_vector[i]))); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_isqrt.cu b/test/test_cuda_u8_isqrt.cu new file mode 100644 index 0000000..3c68a88 --- /dev/null +++ b/test/test_cuda_u8_isqrt.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::isqrt(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::isqrt(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_lcm.cu b/test/test_cuda_u8_lcm.cu new file mode 100644 index 0000000..636263b --- /dev/null +++ b/test/test_cuda_u8_lcm.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::lcm(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, 10U}; + std::uniform_int_distribution dist2{1U, 10U}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::lcm(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_midpoint.cu b/test/test_cuda_u8_midpoint.cu new file mode 100644 index 0000000..37e2a76 --- /dev/null +++ b/test/test_cuda_u8_midpoint.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::midpoint(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::midpoint(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_next_multiple_of.cu b/test/test_cuda_u8_next_multiple_of.cu new file mode 100644 index 0000000..d5a95d3 --- /dev/null +++ b/test/test_cuda_u8_next_multiple_of.cu @@ -0,0 +1,85 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in1, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::next_multiple_of(in1[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)()) / 2U}; + std::uniform_int_distribution dist2{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::next_multiple_of(input_vector1[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_popcount.cu b/test/test_cuda_u8_popcount.cu new file mode 100644 index 0000000..cd27d91 --- /dev/null +++ b/test/test_cuda_u8_popcount.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, int *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::popcount(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::popcount(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_remove_trailing_zeros.cu b/test/test_cuda_u8_remove_trailing_zeros.cu new file mode 100644 index 0000000..f115383 --- /dev/null +++ b/test/test_cuda_u8_remove_trailing_zeros.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = test_type{boost::safe_numbers::remove_trailing_zeros(in[i]).trimmed_number}; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{1U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(test_type{boost::safe_numbers::remove_trailing_zeros(input_vector[i]).trimmed_number}); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_rotl.cu b/test/test_cuda_u8_rotl.cu new file mode 100644 index 0000000..a45e622 --- /dev/null +++ b/test/test_cuda_u8_rotl.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::rotl(in[i], 3); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::rotl(input_vector[i], 3)); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_rotr.cu b/test/test_cuda_u8_rotr.cu new file mode 100644 index 0000000..47cfd9e --- /dev/null +++ b/test/test_cuda_u8_rotr.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::rotr(in[i], 3); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::rotr(input_vector[i], 3)); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_to_be.cu b/test/test_cuda_u8_to_be.cu new file mode 100644 index 0000000..ae5801c --- /dev/null +++ b/test/test_cuda_u8_to_be.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::to_be(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::to_be(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_u8_to_le.cu b/test/test_cuda_u8_to_le.cu new file mode 100644 index 0000000..d51dd61 --- /dev/null +++ b/test/test_cuda_u8_to_le.cu @@ -0,0 +1,82 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::u8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::safe_numbers::to_le(in[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{0U, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::safe_numbers::to_le(input_vector[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +}