-
Notifications
You must be signed in to change notification settings - Fork 127
gpu compatible write strategy, move compact strategy to use btrblocks with zstd and pco #6322
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
09d6906
143b084
3b1e9af
73c05cf
735233c
b1eb106
f4f3811
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,9 +51,21 @@ pub struct BtrBlocksCompressorBuilder { | |
| impl Default for BtrBlocksCompressorBuilder { | ||
| fn default() -> Self { | ||
| Self { | ||
| int_schemes: ALL_INT_SCHEMES.iter().copied().collect(), | ||
| float_schemes: ALL_FLOAT_SCHEMES.iter().copied().collect(), | ||
| string_schemes: ALL_STRING_SCHEMES.iter().copied().collect(), | ||
| int_schemes: ALL_INT_SCHEMES | ||
| .iter() | ||
| .copied() | ||
| .filter(|s| s.code() != IntCode::Pco) | ||
|
Comment on lines
+54
to
+57
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall we have ALL_DEFAULT_SCEHEMS? next to all schemes?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. its just people will forget to omit other schemes |
||
| .collect(), | ||
| float_schemes: ALL_FLOAT_SCHEMES | ||
| .iter() | ||
| .copied() | ||
| .filter(|s| s.code() != FloatCode::Pco) | ||
| .collect(), | ||
| string_schemes: ALL_STRING_SCHEMES | ||
| .iter() | ||
| .copied() | ||
| .filter(|s| s.code() != StringCode::Zstd) | ||
| .collect(), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,6 +62,8 @@ pub const ALL_INT_SCHEMES: &[&dyn IntegerScheme] = &[ | |
| &RunEndScheme, | ||
| &SequenceScheme, | ||
| &RLE_INTEGER_SCHEME, | ||
| #[cfg(feature = "pco")] | ||
| &PcoScheme, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not must not be in the default |
||
| ]; | ||
|
|
||
| /// [`Compressor`] for signed and unsigned integers. | ||
|
|
@@ -156,6 +158,8 @@ pub enum IntCode { | |
| Sequence, | ||
| /// RLE encoding - generic run-length encoding. | ||
| Rle, | ||
| /// Pco (pcodec) compression for integers. | ||
| Pco, | ||
| } | ||
|
|
||
| #[derive(Debug, Copy, Clone, PartialEq, Eq)] | ||
|
|
@@ -188,6 +192,11 @@ pub struct RunEndScheme; | |
| #[derive(Debug, Copy, Clone, PartialEq, Eq)] | ||
| pub struct SequenceScheme; | ||
|
|
||
| /// Pco (pcodec) compression for integers. | ||
| #[cfg(feature = "pco")] | ||
| #[derive(Debug, Copy, Clone, PartialEq, Eq)] | ||
| pub struct PcoScheme; | ||
|
|
||
| /// Threshold for the average run length in an array before we consider run-end encoding. | ||
| const RUN_END_THRESHOLD: u32 = 4; | ||
|
|
||
|
|
@@ -818,6 +827,49 @@ impl Scheme for SequenceScheme { | |
| } | ||
| } | ||
|
|
||
| #[cfg(feature = "pco")] | ||
| impl Scheme for PcoScheme { | ||
| type StatsType = IntegerStats; | ||
| type CodeType = IntCode; | ||
|
|
||
| fn code(&self) -> IntCode { | ||
| IntCode::Pco | ||
| } | ||
|
|
||
| fn expected_compression_ratio( | ||
| &self, | ||
| compressor: &BtrBlocksCompressor, | ||
| stats: &Self::StatsType, | ||
| ctx: CompressorContext, | ||
| excludes: &[IntCode], | ||
| ) -> VortexResult<f64> { | ||
| // Pco does not support I8 or U8. | ||
| if matches!( | ||
| stats.src.ptype(), | ||
| vortex_dtype::PType::I8 | vortex_dtype::PType::U8 | ||
| ) { | ||
| return Ok(0.0); | ||
| } | ||
|
|
||
| self.estimate_compression_ratio_with_sampling(compressor, stats, ctx, excludes) | ||
| } | ||
|
|
||
| fn compress( | ||
| &self, | ||
| _compressor: &BtrBlocksCompressor, | ||
| stats: &Self::StatsType, | ||
| _ctx: CompressorContext, | ||
| _excludes: &[IntCode], | ||
| ) -> VortexResult<ArrayRef> { | ||
| Ok(vortex_pco::PcoArray::from_primitive( | ||
| stats.source(), | ||
| pco::DEFAULT_COMPRESSION_LEVEL, | ||
| 8192, | ||
| )? | ||
| .into_array()) | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use std::iter; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
here is currently where we exclude pco and zstd from the default