diff --git a/datafusion-examples/src/utils/csv_to_parquet.rs b/datafusion-examples/src/utils/csv_to_parquet.rs index 16541b13ae9a9..1fbf2930e9043 100644 --- a/datafusion-examples/src/utils/csv_to_parquet.rs +++ b/datafusion-examples/src/utils/csv_to_parquet.rs @@ -18,9 +18,8 @@ use std::path::{Path, PathBuf}; use datafusion::dataframe::DataFrameWriteOptions; -use datafusion::error::Result; +use datafusion::error::{DataFusionError, Result}; use datafusion::prelude::{CsvReadOptions, SessionContext}; -use datafusion_common::DataFusionError; use tempfile::TempDir; use tokio::fs::create_dir_all; diff --git a/datafusion-examples/src/utils/datasets/mod.rs b/datafusion-examples/src/utils/datasets/mod.rs index 47f946f7d89ee..1857e6af9b559 100644 --- a/datafusion-examples/src/utils/datasets/mod.rs +++ b/datafusion-examples/src/utils/datasets/mod.rs @@ -18,8 +18,7 @@ use std::path::PathBuf; use arrow_schema::SchemaRef; -use datafusion::error::Result; -use datafusion_common::DataFusionError; +use datafusion::error::{DataFusionError, Result}; pub mod cars; pub mod regex; @@ -50,10 +49,11 @@ impl ExampleDataset { } pub fn path_str(&self) -> Result { - self.path().to_str().map(String::from).ok_or_else(|| { + let path = self.path(); + path.to_str().map(String::from).ok_or_else(|| { DataFusionError::Execution(format!( "CSV directory path is not valid UTF-8: {}", - self.path().display() + path.display() )) }) } diff --git a/datafusion-examples/src/utils/example_metadata/discover.rs b/datafusion-examples/src/utils/example_metadata/discover.rs index c5cf3ec1117a2..1ba5f6d29a14e 100644 --- a/datafusion-examples/src/utils/example_metadata/discover.rs +++ b/datafusion-examples/src/utils/example_metadata/discover.rs @@ -20,10 +20,12 @@ //! An example group is defined as a directory containing a `main.rs` file //! under the examples root. This module is intentionally filesystem-focused //! and does not perform any parsing or rendering. +//! Discovery fails if no valid example groups are found. use std::fs; use std::path::{Path, PathBuf}; +use datafusion::common::exec_err; use datafusion::error::Result; /// Discovers all example group directories under the given root. @@ -35,10 +37,15 @@ pub fn discover_example_groups(root: &Path) -> Result> { let entry = entry?; let path = entry.path(); - if path.is_dir() && path.join("main.rs").exists() { + if path.is_dir() && path.join("main.rs").is_file() { groups.push(path); } } + + if groups.is_empty() { + return exec_err!("No example groups found under: {}", root.display()); + } + groups.sort(); Ok(groups) } @@ -47,6 +54,8 @@ pub fn discover_example_groups(root: &Path) -> Result> { mod tests { use super::*; + use crate::utils::example_metadata::test_utils::assert_exec_err_contains; + use std::fs::{self, File}; use tempfile::TempDir; @@ -66,10 +75,29 @@ mod tests { fs::create_dir(&group2)?; let groups = discover_example_groups(root)?; - assert_eq!(groups.len(), 1); assert_eq!(groups[0], group1); + Ok(()) + } + + #[test] + fn discover_example_groups_errors_if_main_rs_is_a_directory() -> Result<()> { + let tmp = TempDir::new()?; + let root = tmp.path(); + let group = root.join("group"); + fs::create_dir(&group)?; + fs::create_dir(group.join("main.rs"))?; + + let err = discover_example_groups(root).unwrap_err(); + assert_exec_err_contains(err, "No example groups found"); + Ok(()) + } + #[test] + fn discover_example_groups_errors_if_none_found() -> Result<()> { + let tmp = TempDir::new()?; + let err = discover_example_groups(tmp.path()).unwrap_err(); + assert_exec_err_contains(err, "No example groups found"); Ok(()) } } diff --git a/datafusion-examples/src/utils/example_metadata/model.rs b/datafusion-examples/src/utils/example_metadata/model.rs index 74b605cde1c3a..11416d141eb74 100644 --- a/datafusion-examples/src/utils/example_metadata/model.rs +++ b/datafusion-examples/src/utils/example_metadata/model.rs @@ -25,7 +25,16 @@ use std::path::Path; use datafusion::error::{DataFusionError, Result}; -use crate::utils::example_metadata::{parse_main_rs_docs, render::ABBREVIATIONS}; +use crate::utils::example_metadata::parse_main_rs_docs; + +/// Well-known abbreviations used to preserve correct capitalization +/// when generating human-readable documentation titles. +const ABBREVIATIONS: &[(&str, &str)] = &[ + ("dataframe", "DataFrame"), + ("io", "IO"), + ("sql", "SQL"), + ("udf", "UDF"), +]; /// A group of related examples (e.g. `builtin_functions`, `udf`). /// diff --git a/datafusion-examples/src/utils/example_metadata/parser.rs b/datafusion-examples/src/utils/example_metadata/parser.rs index 83105e7d40e60..4ead3e5a2ae9f 100644 --- a/datafusion-examples/src/utils/example_metadata/parser.rs +++ b/datafusion-examples/src/utils/example_metadata/parser.rs @@ -21,15 +21,16 @@ //! and their associated metadata (file name and description), enforcing //! a strict ordering and structure to avoid ambiguous documentation. -use std::path::Path; -use std::{collections::HashSet, fs}; +use std::{collections::HashSet, fs, path::Path}; -use datafusion_common::{DataFusionError, Result}; +use datafusion::common::exec_err; +use datafusion::error::Result; use nom::{ - IResult, Parser, + Err, IResult, Parser, bytes::complete::{tag, take_until, take_while}, character::complete::multispace0, combinator::all_consuming, + error::{Error, ErrorKind}, sequence::{delimited, preceded}, }; @@ -77,19 +78,13 @@ fn parse_metadata_line(input: &str) -> IResult<&str, (&str, &str)> { let content = payload .strip_prefix("(") .and_then(|s| s.strip_suffix(")")) - .ok_or_else(|| { - nom::Err::Error(nom::error::Error::new(payload, nom::error::ErrorKind::Tag)) - })?; + .ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?; let (file, desc) = content .strip_prefix("file:") - .ok_or_else(|| { - nom::Err::Error(nom::error::Error::new(payload, nom::error::ErrorKind::Tag)) - })? + .ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))? .split_once(", desc:") - .ok_or_else(|| { - nom::Err::Error(nom::error::Error::new(payload, nom::error::ErrorKind::Tag)) - })?; + .ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?; Ok((rest, (file.trim(), desc.trim()))) } @@ -119,18 +114,16 @@ pub fn parse_main_rs_docs(path: &Path) -> Result> { let subcommand = match state { ParserState::SeenSubcommand(s) => s, ParserState::Idle => { - return Err(DataFusionError::Execution(format!( + return exec_err!( "Metadata without preceding subcommand at {}:{}", path.display(), line_no + 1 - ))); + ); } }; if !seen_subcommands.insert(subcommand) { - return Err(DataFusionError::Execution(format!( - "Duplicate metadata for subcommand `{subcommand}`" - ))); + return exec_err!("Duplicate metadata for subcommand `{subcommand}`"); } entries.push(ExampleEntry { diff --git a/datafusion-examples/src/utils/example_metadata/render.rs b/datafusion-examples/src/utils/example_metadata/render.rs index 1a4df3a400b11..a4ea620e78352 100644 --- a/datafusion-examples/src/utils/example_metadata/render.rs +++ b/datafusion-examples/src/utils/example_metadata/render.rs @@ -85,15 +85,6 @@ cargo run --example dataframe -- dataframe ``` "#; -/// Well-known abbreviations used to preserve correct capitalization -/// when generating human-readable documentation titles. -pub const ABBREVIATIONS: &[(&str, &str)] = &[ - ("dataframe", "DataFrame"), - ("io", "IO"), - ("sql", "SQL"), - ("udf", "UDF"), -]; - /// Generates Markdown documentation for DataFusion examples. /// /// If `group` is `None`, documentation is generated for all example groups.