Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ sha2 = "0.11.0"
simdutf8 = "0.1.5"
similar = "3.0.0"
sketches-ddsketch = "0.4.0"
smallvec = "1.15.1"
smol = "2.0.2"
static_assertions = "1.1"
strum = "0.28"
Expand Down
1 change: 1 addition & 0 deletions encodings/sequence/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ version = { workspace = true }
[dependencies]
num-traits = { workspace = true }
prost = { workspace = true }
smallvec = { workspace = true }
vortex-array = { workspace = true }
vortex-buffer = { workspace = true }
vortex-error = { workspace = true }
Expand Down
3 changes: 2 additions & 1 deletion encodings/sequence/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use std::hash::Hasher;

use num_traits::cast::FromPrimitive;
use prost::Message;
use smallvec::smallvec;
use vortex_array::Array;
use vortex_array::ArrayEq;
use vortex_array::ArrayHash;
Expand Down Expand Up @@ -383,7 +384,7 @@ impl Sequence {

// SAFETY: we don't have duplicate stats.
unsafe {
StatsSet::new_unchecked(vec![
StatsSet::new_unchecked(smallvec![
(Stat::IsSorted, StatPrecision::Exact(is_sorted.into())),
(
Stat::IsStrictSorted,
Expand Down
1 change: 1 addition & 0 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ rstest_reuse = { workspace = true, optional = true }
rustc-hash = { workspace = true }
serde = { workspace = true, optional = true, features = ["derive", "rc"] }
simdutf8 = { workspace = true }
smallvec = { workspace = true }
static_assertions = { workspace = true }
tabled = { workspace = true, optional = true, default-features = false, features = [
"std",
Expand Down
4 changes: 3 additions & 1 deletion vortex-array/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -19344,7 +19344,7 @@ pub fn vortex_array::stats::StatsSet::as_mut_typed_ref<'a, 'b>(&'a mut self, &'b

pub fn vortex_array::stats::StatsSet::as_typed_ref<'a, 'b>(&'a self, &'b vortex_array::dtype::DType) -> vortex_array::stats::TypedStatsSetRef<'a, 'b>

pub unsafe fn vortex_array::stats::StatsSet::new_unchecked(alloc::vec::Vec<(vortex_array::expr::stats::Stat, vortex_array::expr::stats::Precision<vortex_array::scalar::ScalarValue>)>) -> Self
pub unsafe fn vortex_array::stats::StatsSet::new_unchecked(smallvec::SmallVec<vortex_array::stats::StatsArray>) -> Self

pub fn vortex_array::stats::StatsSet::of(vortex_array::expr::stats::Stat, vortex_array::expr::stats::Precision<vortex_array::scalar::ScalarValue>) -> Self

Expand Down Expand Up @@ -19508,6 +19508,8 @@ pub fn vortex_array::stats::as_stat_bitset_bytes(&[vortex_array::expr::stats::St

pub fn vortex_array::stats::stats_from_bitset_bytes(&[u8]) -> alloc::vec::Vec<vortex_array::expr::stats::Stat>

pub type vortex_array::stats::StatsArray = [(vortex_array::expr::stats::Stat, vortex_array::expr::stats::Precision<vortex_array::scalar::ScalarValue>); 4]

pub mod vortex_array::stream

pub struct vortex_array::stream::ArrayStreamAdapter<S>
Expand Down
3 changes: 2 additions & 1 deletion vortex-array/src/arrays/dict/take.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use smallvec::SmallVec;
use vortex_error::VortexResult;

use super::Dict;
Expand Down Expand Up @@ -164,7 +165,7 @@ pub(crate) fn propagate_take_stats(
.and_then(|v| v.map(|s| s.into_value()).into_inexact().transpose())
.map(|sv| (stat, sv))
})
.collect::<Vec<_>>();
.collect::<SmallVec<_>>();
st.combine_sets(
&(unsafe { StatsSet::new_unchecked(inexact_min_max) }).as_typed_ref(source.dtype()),
)
Expand Down
41 changes: 18 additions & 23 deletions vortex-array/src/stats/stats_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

use std::fmt::Debug;

use enum_iterator::Sequence;
use enum_iterator::all;
use num_traits::CheckedAdd;
use smallvec::SmallVec;
use smallvec::smallvec;
use vortex_error::VortexError;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
Expand All @@ -31,9 +32,12 @@ use crate::expr::stats::UncompressedSizeInBytes;
use crate::scalar::Scalar;
use crate::scalar::ScalarValue;

/// Type of the SmallVec stored inside StatsSet
pub type StatsArray = [(Stat, Precision<ScalarValue>); 4];

#[derive(Default, Debug, Clone)]
pub struct StatsSet {
values: Vec<(Stat, Precision<ScalarValue>)>,
values: SmallVec<StatsArray>,
}

impl StatsSet {
Expand All @@ -42,20 +46,14 @@ impl StatsSet {
/// # Safety
///
/// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
pub unsafe fn new_unchecked(values: SmallVec<StatsArray>) -> Self {
Self { values }
}

/// Create StatsSet from single stat and value
pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
// SAFETY: No duplicate stats will be set here.
unsafe { Self::new_unchecked(vec![(stat, value)]) }
}

fn reserve_full_capacity(&mut self) {
if self.values.capacity() < Stat::CARDINALITY {
self.values
.reserve_exact(Stat::CARDINALITY - self.values.capacity());
Self {
values: smallvec![(stat, value)],
}
}

Expand All @@ -80,8 +78,6 @@ impl StatsSet {
impl StatsSet {
/// Set the stat `stat` to `value`.
pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
self.reserve_full_capacity();

if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
*existing = (stat, value);
} else {
Expand Down Expand Up @@ -154,7 +150,7 @@ impl StatsSet {
/// Owned iterator over the stats.
///
/// See [IntoIterator].
pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
pub struct StatsSetIntoIter(smallvec::IntoIter<StatsArray>);

impl Iterator for StatsSetIntoIter {
type Item = (Stat, Precision<ScalarValue>);
Expand All @@ -176,10 +172,10 @@ impl IntoIterator for StatsSet {
impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
let iter = iter.into_iter();
let mut values = Vec::default();
values.reserve_exact(Stat::CARDINALITY);

let mut this = Self { values };
let mut this = Self {
values: SmallVec::new(),
};
this.extend(iter);
this
}
Expand All @@ -188,10 +184,8 @@ impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
#[inline]
fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
let iter = iter.into_iter();
self.reserve_full_capacity();

iter.for_each(|(stat, value)| self.set(stat, value));
iter.into_iter()
.for_each(|(stat, value)| self.set(stat, value));
}
}

Expand Down Expand Up @@ -574,6 +568,7 @@ impl MutTypedStatsSetRef<'_, '_> {
mod test {
use enum_iterator::all;
use itertools::Itertools;
use smallvec::smallvec;

use crate::LEGACY_SESSION;
use crate::VortexSessionExecute;
Expand All @@ -593,7 +588,7 @@ mod test {
fn test_iter() {
// SAFETY: No duplicate stats.
let set = unsafe {
StatsSet::new_unchecked(vec![
StatsSet::new_unchecked(smallvec![
(Stat::Max, Precision::exact(100)),
(Stat::Min, Precision::exact(42)),
])
Expand Down Expand Up @@ -621,7 +616,7 @@ mod test {
fn into_iter() {
// SAFETY: No duplicate stats.
let mut set = unsafe {
StatsSet::new_unchecked(vec![
StatsSet::new_unchecked(smallvec![
(Stat::Max, Precision::exact(100)),
(Stat::Min, Precision::exact(42)),
])
Expand Down
Loading