Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datafusion-examples/examples/data_io/parquet_encrypted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ pub async fn parquet_encrypted() -> datafusion::common::Result<()> {
// Read encrypted parquet back as a DataFrame using matching decryption config
let ctx: SessionContext = SessionContext::new();
let read_options =
ParquetReadOptions::default().file_decryption_properties((&decrypt).into());
ParquetReadOptions::default().file_decryption_properties((&decrypt).try_into()?);

let encrypted_parquet_df = ctx
.read_parquet(tempfile.to_str().unwrap(), read_options)
Expand Down
61 changes: 53 additions & 8 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3024,8 +3024,18 @@ impl From<ConfigFileDecryptionProperties> for FileDecryptionProperties {
}

#[cfg(feature = "parquet_encryption")]
impl From<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
fn from(f: &Arc<FileDecryptionProperties>) -> Self {
impl TryFrom<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
Comment thread
adamreeve marked this conversation as resolved.
type Error = DataFusionError;

fn try_from(f: &Arc<FileDecryptionProperties>) -> Result<Self> {
let footer_key = f.footer_key(None).map_err(|e| {
Comment thread
adamreeve marked this conversation as resolved.
DataFusionError::Configuration(format!(
"Could not retrieve footer key from FileDecryptionProperties. \
Note that conversion to ConfigFileDecryptionProperties is not supported \
when using a key retriever: {e}"
))
})?;

let (column_names_vec, column_keys_vec) = f.column_keys();
let mut column_decryption_properties: HashMap<
String,
Expand All @@ -3039,14 +3049,12 @@ impl From<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
}

let aad_prefix = f.aad_prefix().cloned().unwrap_or_default();
ConfigFileDecryptionProperties {
footer_key_as_hex: hex::encode(
f.footer_key(None).unwrap_or_default().as_ref(),
),
Ok(ConfigFileDecryptionProperties {
footer_key_as_hex: hex::encode(footer_key.as_ref()),
column_decryption_properties,
aad_prefix_as_hex: hex::encode(aad_prefix),
footer_signature_verification: f.check_plaintext_footer_integrity(),
}
})
}
}

Expand Down Expand Up @@ -3519,7 +3527,8 @@ mod tests {
Arc::new(FileEncryptionProperties::from(config_encrypt.clone()));
assert_eq!(file_encryption_properties, encryption_properties_built);

let config_decrypt = ConfigFileDecryptionProperties::from(&decryption_properties);
let config_decrypt =
ConfigFileDecryptionProperties::try_from(&decryption_properties).unwrap();
let decryption_properties_built =
Arc::new(FileDecryptionProperties::from(config_decrypt.clone()));
assert_eq!(decryption_properties, decryption_properties_built);
Expand Down Expand Up @@ -3637,6 +3646,42 @@ mod tests {
assert_eq!(factory_options.get("key2"), Some(&"value 2".to_string()));
}

#[cfg(feature = "parquet_encryption")]
struct ParquetEncryptionKeyRetriever {}

#[cfg(feature = "parquet_encryption")]
impl parquet::encryption::decrypt::KeyRetriever for ParquetEncryptionKeyRetriever {
fn retrieve_key(&self, key_metadata: &[u8]) -> parquet::errors::Result<Vec<u8>> {
if !key_metadata.is_empty() {
Ok(b"1234567890123450".to_vec())
} else {
Err(parquet::errors::ParquetError::General(
"Key metadata not provided".to_string(),
))
}
}
}

#[cfg(feature = "parquet_encryption")]
#[test]
fn conversion_from_key_retriever_to_config_file_decryption_properties() {
use crate::Result;
use crate::config::ConfigFileDecryptionProperties;
use crate::encryption::FileDecryptionProperties;

let retriever = std::sync::Arc::new(ParquetEncryptionKeyRetriever {});
let decryption_properties =
FileDecryptionProperties::with_key_retriever(retriever)
.build()
.unwrap();
let config_file_decryption_properties: Result<ConfigFileDecryptionProperties> =
(&decryption_properties).try_into();
assert!(config_file_decryption_properties.is_err());
let err = config_file_decryption_properties.unwrap_err().to_string();
assert!(err.contains("key retriever"));
assert!(err.contains("Key metadata not provided"));
}

#[cfg(feature = "parquet")]
#[test]
fn parquet_table_options_config_entry() {
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/dataframe/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@ mod tests {

// Read encrypted parquet
let ctx: SessionContext = SessionContext::new();
let read_options =
ParquetReadOptions::default().file_decryption_properties((&decrypt).into());
let read_options = ParquetReadOptions::default()
.file_decryption_properties((&decrypt).try_into()?);

ctx.register_parquet("roundtrip_parquet", &tempfile_str, read_options.clone())
.await?;
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/parquet/encryption.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ async fn round_trip_encryption() {

// Read encrypted parquet
let ctx: SessionContext = SessionContext::new();
let options =
ParquetReadOptions::default().file_decryption_properties((&decrypt).into());
let options = ParquetReadOptions::default()
.file_decryption_properties((&decrypt).try_into().unwrap());

let encrypted_batches = read_parquet_test_data(
tempfile.into_os_string().into_string().unwrap(),
Expand Down
37 changes: 37 additions & 0 deletions docs/source/library-user-guide/upgrading/54.0.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -450,3 +450,40 @@ clusters (e.g., ZWJ emoji sequences). For ASCII and most common Unicode text,
behavior is unchanged.

[#17861]: https://github.com/apache/datafusion/pull/17861

### Conversion from `FileDecryptionProperties` to `ConfigFileDecryptionProperties` is now fallible

Previously, `datafusion_common::config::ConfigFileDecryptionProperties`
Comment thread
adamreeve marked this conversation as resolved.
implemented `From<&Arc<parquet::encryption::decrypt::FileDecryptionProperties>>`.
If an error was encountered when retrieving the footer key without providing key metadata,
the error would be ignored and an empty footer key set in the result.
This could lead to obscure errors later.

`ConfigFileDecryptionProperties` now instead implements `TryFrom<&Arc<FileDecryptionProperties>>`,
and errors retrieving the footer key will be propagated up.

**Migration guide:**

Replace calls to `ConfigFileDecryptionProperties::from` with `ConfigFileDecryptionProperties::try_from`,
and affected calls to `into` with `try_into`, with appropriate error handling added.

**Before:**

```rust,ignore
let config_decryption_properties: ConfigFileDecryptionProperties = (&decryption_properties).into();
// or
let config_decryption_properties = ConfigFileDecryptionProperties::from(&decryption_properties);
```

(where `decryption_properties` is an `Arc<FileDecryptionProperties>`)

**After:**

```rust,ignore
let config_decryption_properties: ConfigFileDecryptionProperties = (&decryption_properties).try_into()?;
// or
let config_decryption_properties = ConfigFileDecryptionProperties::try_from(&decryption_properties)?;
```

See [#21602](https://github.com/apache/datafusion/issues/21602) and
[PR #21603](https://github.com/apache/datafusion/pull/21603) for details.
Loading