Skip to content
Open
494 changes: 460 additions & 34 deletions include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh

Large diffs are not rendered by default.

79 changes: 79 additions & 0 deletions include/cuco/detail/probing_scheme/probing_scheme_impl.inl
Original file line number Diff line number Diff line change
Expand Up @@ -208,4 +208,83 @@ double_hashing<CGSize, Hash1, Hash2>::hash_function() const noexcept
return {hash1_, hash2_};
}

namespace detail {

/**
* @brief Robin Hood inverse primitive for the linear probing sequence.
*
* @note Recovers a resident's probe distance ("age") from the slot it occupies: how many probing
* steps the resident sits from its own home bucket. For the linear sequence this is a single
* subtract — `(slot_base - resident_home) / stride mod num_buckets`. This is the linear-only
* overload; a `double_hashing` variant would add its own overload here (a modular inverse of the
* resident's per-key step, or a stored age), which is the single place that change lands.
*
* @tparam BucketSize Size of the bucket
* @tparam CGSize Size of CUDA Cooperative Groups
* @tparam Hash Unary callable type
* @tparam ProbeKey Type of probing key
* @tparam Extent Type of extent
*
* @param scheme The underlying linear probing scheme (supplies the hash function)
* @param resident_key The key currently residing in the slot
* @param slot_index The slot index at which `resident_key` resides
* @param upper_bound Upper bound of the iteration
* @return The resident's probe distance, in probing steps
*/
template <int32_t BucketSize, int32_t CGSize, typename Hash, typename ProbeKey, typename Extent>
[[nodiscard]] __host__ __device__ constexpr typename Extent::value_type probe_distance(
linear_probing<CGSize, Hash> const& scheme,
ProbeKey resident_key,
typename Extent::value_type slot_index,
Extent upper_bound) noexcept
{
using size_type = typename Extent::value_type;
size_type constexpr stride = CGSize * BucketSize;
auto const bound = static_cast<size_type>(upper_bound);
auto const hash = scheme.hash_function();

// Home bucket base of the resident, using the same alignment as `make_iterator`.
size_type const resident_home =
cuco::detail::sanitize_hash<size_type>(hash(resident_key)) % (bound / stride) * stride;

// Bucket-strided base of the slot the resident currently occupies. The per-lane `thread_rank`
// offset (which is < stride) is stripped by the floor division so that the distance is measured
// in whole probing steps, consistent with the forward sequence.
size_type const slot_base = (slot_index / stride) * stride;

// (slot_base - resident_home) mod capacity, expressed in probing steps.
return static_cast<size_type>((slot_base + bound - resident_home) % bound) / stride;
}

} // namespace detail

template <typename Underlying>
__host__ __device__ constexpr robin_hood_probing<Underlying>::robin_hood_probing(
Underlying const& probing)
: Underlying{probing}
{
}

template <typename Underlying>
template <typename NewHash>
__host__ __device__ constexpr auto robin_hood_probing<Underlying>::rebind_hash_function(
NewHash const& hash) const noexcept
{
auto const inner = static_cast<Underlying const&>(*this).rebind_hash_function(hash);
return robin_hood_probing<cuda::std::decay_t<decltype(inner)>>{inner};
}

template <typename Underlying>
template <int32_t BucketSize, typename ProbeKey, typename Extent>
__host__ __device__ constexpr typename Extent::value_type
robin_hood_probing<Underlying>::probe_distance(ProbeKey resident_key,
typename Extent::value_type slot_index,
Extent upper_bound) const noexcept
{
// Delegate to the per-underlying-scheme inverse overload. Only `linear_probing` provides one
// today; wrapping a scheme without a matching overload is a compile-time error.
return cuco::detail::probe_distance<BucketSize>(
static_cast<Underlying const&>(*this), resident_key, slot_index, upper_bound);
}

} // namespace cuco
Loading