From 554b80432a36337bb5769e60022a5c2c4a3bbc47 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 15 Jun 2026 11:39:50 +0200 Subject: [PATCH] refactor: Simplify heap size estimation for arrays This introduces a macro for the redundant heap size estimation for arrays --- datafusion/common/src/heap_size.rs | 121 +++++++++++++++++++---------- 1 file changed, 80 insertions(+), 41 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 802f9d3883222..869946d82414f 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -326,47 +326,6 @@ impl DFHeapSize for Fields { } } -impl DFHeapSize for StructArray { - fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { - self.get_array_memory_size() - } -} - -impl DFHeapSize for LargeListArray { - fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { - self.get_array_memory_size() - } -} - -impl DFHeapSize for LargeListViewArray { - fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { - self.get_array_memory_size() - } -} - -impl DFHeapSize for ListArray { - fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { - self.get_array_memory_size() - } -} - -impl DFHeapSize for ListViewArray { - fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { - self.get_array_memory_size() - } -} - -impl DFHeapSize for FixedSizeListArray { - fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { - self.get_array_memory_size() - } -} -impl DFHeapSize for MapArray { - fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { - self.get_array_memory_size() - } -} - impl DFHeapSize for Box { fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { size_of::() + self.as_ref().heap_size(ctx) @@ -469,6 +428,29 @@ impl_zero_heap_size!( DateTime, ); +/// Implement [`DFHeapSize`] for Arrow arrays types. +macro_rules! impl_array_heap_size { + ($($t:ty),+ $(,)?) => { + $( + impl DFHeapSize for $t { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { + self.get_array_memory_size() + } + } + )+ + }; +} + +impl_array_heap_size!( + StructArray, + LargeListArray, + LargeListViewArray, + ListArray, + ListViewArray, + FixedSizeListArray, + MapArray, +); + #[cfg(test)] mod tests { use super::*; @@ -696,4 +678,61 @@ mod tests { let field = Field::new("temperature", DataType::Float64, true); assert!(size(&field) > 0); } + + #[test] + fn test_list_array() { + use arrow::array::types::Int32Type; + + let array = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + Some(vec![Some(4)]), + ]); + assert_eq!(size(&array), array.get_array_memory_size()); + assert!(size(&array) > 0); + + let large = + LargeListArray::from_iter_primitive::(vec![Some(vec![ + Some(1), + Some(2), + ])]); + assert_eq!(size(&large), large.get_array_memory_size()); + assert!(size(&large) > 0); + } + + #[test] + fn test_struct_array() { + use arrow::array::Int32Array; + + let array = StructArray::from(vec![( + Arc::new(Field::new("a", DataType::Int32, true)), + Arc::new(Int32Array::from(vec![1, 2, 3])) as _, + )]); + assert_eq!(size(&array), array.get_array_memory_size()); + assert!(size(&array) > 0); + } + + #[test] + fn test_fixed_size_list_array() { + use arrow::array::Int32Array; + + let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4])); + let field = Arc::new(Field::new("item", DataType::Int32, true)); + let array = FixedSizeListArray::new(field, 2, values, None); + assert_eq!(size(&array), array.get_array_memory_size()); + assert!(size(&array) > 0); + } + + #[test] + fn test_map_array() { + use arrow::array::{Int32Builder, MapBuilder, StringBuilder}; + + let mut builder = + MapBuilder::new(None, StringBuilder::new(), Int32Builder::new()); + builder.keys().append_value("key"); + builder.values().append_value(1); + builder.append(true).unwrap(); + let array = builder.finish(); + assert_eq!(size(&array), array.get_array_memory_size()); + assert!(size(&array) > 0); + } }