diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index c546bb583e3..b2d0bb65730 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -73,6 +73,7 @@ use crate::client_common::Prompt; use crate::client_common::ResponseEvent; use crate::compact::collect_user_messages; use crate::config::Config; +use crate::config::GhostSnapshotConfig; use crate::config::types::ShellEnvironmentPolicy; use crate::context_manager::ContextManager; use crate::environment_context::EnvironmentContext; @@ -361,6 +362,7 @@ pub(crate) struct TurnContext { pub(crate) sandbox_policy: SandboxPolicy, pub(crate) shell_environment_policy: ShellEnvironmentPolicy, pub(crate) tools_config: ToolsConfig, + pub(crate) ghost_snapshot: GhostSnapshotConfig, pub(crate) final_output_json_schema: Option, pub(crate) codex_linux_sandbox_exe: Option, pub(crate) tool_call_gate: Arc, @@ -522,6 +524,7 @@ impl Session { sandbox_policy: session_configuration.sandbox_policy.clone(), shell_environment_policy: per_turn_config.shell_environment_policy.clone(), tools_config, + ghost_snapshot: per_turn_config.ghost_snapshot.clone(), final_output_json_schema: None, codex_linux_sandbox_exe: per_turn_config.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), @@ -2015,6 +2018,7 @@ async fn spawn_review_thread( sub_id: sub_id.to_string(), client, tools_config, + ghost_snapshot: parent_turn_context.ghost_snapshot.clone(), developer_instructions: None, user_instructions: None, base_instructions: Some(base_instructions.clone()), diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 4c1a073c577..25325ad1487 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -63,6 +63,8 @@ pub mod types; const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5.1-codex-max"; +pub use codex_git::GhostSnapshotConfig; + /// Maximum number of bytes of the documentation that will be embedded. Larger /// files are *silently truncated* to this size so we do not take up too much of /// the context window. @@ -265,6 +267,9 @@ pub struct Config { /// https://github.com/modelcontextprotocol/rust-sdk pub use_experimental_use_rmcp_client: bool, + /// Settings for ghost snapshots (used for undo). + pub ghost_snapshot: GhostSnapshotConfig, + /// Centralized feature flags; source of truth for feature gating. pub features: Features, @@ -716,6 +721,10 @@ pub struct ConfigToml { #[serde(default)] pub features: Option, + /// Settings for ghost snapshots (used for undo). + #[serde(default)] + pub ghost_snapshot: Option, + /// When `true`, checks for Codex updates on startup and surfaces update prompts. /// Set to `false` only if your Codex updates are centrally managed. /// Defaults to `true`. @@ -805,6 +814,17 @@ impl From for Tools { } } +#[derive(Deserialize, Debug, Clone, Default, PartialEq, Eq)] +pub struct GhostSnapshotToml { + /// Exclude untracked files larger than this many bytes from ghost snapshots. + #[serde(alias = "ignore_untracked_files_over_bytes")] + pub ignore_large_untracked_files: Option, + /// Ignore untracked directories that contain this many files or more. + /// (Still emits a warning.) + #[serde(alias = "large_untracked_dir_warning_threshold")] + pub ignore_large_untracked_dirs: Option, +} + #[derive(Debug, PartialEq, Eq)] pub struct SandboxPolicyResolution { pub policy: SandboxPolicy, @@ -1103,6 +1123,26 @@ impl Config { let history = cfg.history.unwrap_or_default(); + let ghost_snapshot = { + let mut config = GhostSnapshotConfig::default(); + if let Some(ghost_snapshot) = cfg.ghost_snapshot.as_ref() + && let Some(ignore_over_bytes) = ghost_snapshot.ignore_large_untracked_files + { + config.ignore_large_untracked_files = if ignore_over_bytes > 0 { + Some(ignore_over_bytes) + } else { + None + }; + } + if let Some(ghost_snapshot) = cfg.ghost_snapshot.as_ref() + && let Some(threshold) = ghost_snapshot.ignore_large_untracked_dirs + { + config.ignore_large_untracked_dirs = + if threshold > 0 { Some(threshold) } else { None }; + } + config + }; + let include_apply_patch_tool_flag = features.enabled(Feature::ApplyPatchFreeform); let tools_web_search_request = features.enabled(Feature::WebSearchRequest); let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec); @@ -1235,6 +1275,7 @@ impl Config { tools_web_search_request, use_experimental_unified_exec_tool, use_experimental_use_rmcp_client, + ghost_snapshot, features, active_profile: active_profile_name, active_project, @@ -2986,6 +3027,7 @@ model_verbosity = "high" tools_web_search_request: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, + ghost_snapshot: GhostSnapshotConfig::default(), features: Features::with_defaults(), active_profile: Some("o3".to_string()), active_project: ProjectConfig { trust_level: None }, @@ -3060,6 +3102,7 @@ model_verbosity = "high" tools_web_search_request: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, + ghost_snapshot: GhostSnapshotConfig::default(), features: Features::with_defaults(), active_profile: Some("gpt3".to_string()), active_project: ProjectConfig { trust_level: None }, @@ -3149,6 +3192,7 @@ model_verbosity = "high" tools_web_search_request: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, + ghost_snapshot: GhostSnapshotConfig::default(), features: Features::with_defaults(), active_profile: Some("zdr".to_string()), active_project: ProjectConfig { trust_level: None }, @@ -3224,6 +3268,7 @@ model_verbosity = "high" tools_web_search_request: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, + ghost_snapshot: GhostSnapshotConfig::default(), features: Features::with_defaults(), active_profile: Some("gpt5".to_string()), active_project: ProjectConfig { trust_level: None }, diff --git a/codex-rs/core/src/tasks/ghost_snapshot.rs b/codex-rs/core/src/tasks/ghost_snapshot.rs index 7e84c330f6d..af694464d42 100644 --- a/codex-rs/core/src/tasks/ghost_snapshot.rs +++ b/codex-rs/core/src/tasks/ghost_snapshot.rs @@ -73,30 +73,43 @@ impl SessionTask for GhostSnapshotTask { _ = cancellation_token.cancelled() => true, _ = async { let repo_path = ctx_for_task.cwd.clone(); + let ghost_snapshot = ctx_for_task.ghost_snapshot.clone(); + let ignore_large_untracked_dirs = ghost_snapshot.ignore_large_untracked_dirs; // First, compute a snapshot report so we can warn about // large untracked directories before running the heavier // snapshot logic. if let Ok(Ok(report)) = tokio::task::spawn_blocking({ let repo_path = repo_path.clone(); + let ghost_snapshot = ghost_snapshot.clone(); move || { - let options = CreateGhostCommitOptions::new(&repo_path); + let options = + CreateGhostCommitOptions::new(&repo_path).ghost_snapshot(ghost_snapshot); capture_ghost_snapshot_report(&options) } }) .await - && let Some(message) = format_large_untracked_warning(&report) { - session - .session - .send_event( - &ctx_for_task, - EventMsg::Warning(WarningEvent { message }), - ) - .await; - } + { + for message in + format_snapshot_warnings( + ghost_snapshot.ignore_large_untracked_files, + ignore_large_untracked_dirs, + &report, + ) + { + session + .session + .send_event( + &ctx_for_task, + EventMsg::Warning(WarningEvent { message }), + ) + .await; + } + } // Required to run in a dedicated blocking pool. match tokio::task::spawn_blocking(move || { - let options = CreateGhostCommitOptions::new(&repo_path); + let options = + CreateGhostCommitOptions::new(&repo_path).ghost_snapshot(ghost_snapshot); create_ghost_commit(&options) }) .await @@ -161,10 +174,31 @@ impl GhostSnapshotTask { } } -fn format_large_untracked_warning(report: &GhostSnapshotReport) -> Option { +fn format_snapshot_warnings( + ignore_large_untracked_files: Option, + ignore_large_untracked_dirs: Option, + report: &GhostSnapshotReport, +) -> Vec { + let mut warnings = Vec::new(); + if let Some(message) = format_large_untracked_warning(ignore_large_untracked_dirs, report) { + warnings.push(message); + } + if let Some(message) = + format_ignored_untracked_files_warning(ignore_large_untracked_files, report) + { + warnings.push(message); + } + warnings +} + +fn format_large_untracked_warning( + ignore_large_untracked_dirs: Option, + report: &GhostSnapshotReport, +) -> Option { if report.large_untracked_dirs.is_empty() { return None; } + let threshold = ignore_large_untracked_dirs?; const MAX_DIRS: usize = 3; let mut parts: Vec = Vec::new(); for dir in report.large_untracked_dirs.iter().take(MAX_DIRS) { @@ -175,7 +209,85 @@ fn format_large_untracked_warning(report: &GhostSnapshotReport) -> Option= {threshold} files): {}. These directories are excluded from snapshots and undo cleanup. Adjust `ghost_snapshot.ignore_large_untracked_dirs` to change this behavior.", + parts.join(", ") + )) +} + +fn format_ignored_untracked_files_warning( + ignore_large_untracked_files: Option, + report: &GhostSnapshotReport, +) -> Option { + let threshold = ignore_large_untracked_files?; + if report.ignored_untracked_files.is_empty() { + return None; + } + + const MAX_FILES: usize = 3; + let mut parts: Vec = Vec::new(); + for file in report.ignored_untracked_files.iter().take(MAX_FILES) { + parts.push(format!( + "{} ({})", + file.path.display(), + format_bytes(file.byte_size) + )); + } + if report.ignored_untracked_files.len() > MAX_FILES { + let remaining = report.ignored_untracked_files.len() - MAX_FILES; + parts.push(format!("{remaining} more")); + } + + Some(format!( + "Repository snapshot ignored untracked files larger than {}: {}. These files are preserved during undo cleanup, but their contents are not captured in the snapshot. Adjust `ghost_snapshot.ignore_large_untracked_files` to change this behavior. To avoid this message in the future, update your `.gitignore`.", + format_bytes(threshold), parts.join(", ") )) } + +fn format_bytes(bytes: i64) -> String { + const KIB: i64 = 1024; + const MIB: i64 = 1024 * 1024; + + if bytes >= MIB { + return format!("{} MiB", bytes / MIB); + } + if bytes >= KIB { + return format!("{} KiB", bytes / KIB); + } + format!("{bytes} B") +} + +#[cfg(test)] +mod tests { + use super::*; + use codex_git::LargeUntrackedDir; + use pretty_assertions::assert_eq; + use std::path::PathBuf; + + #[test] + fn large_untracked_warning_includes_threshold() { + let report = GhostSnapshotReport { + large_untracked_dirs: vec![LargeUntrackedDir { + path: PathBuf::from("models"), + file_count: 250, + }], + ignored_untracked_files: Vec::new(), + }; + + let message = format_large_untracked_warning(Some(200), &report).unwrap(); + assert!(message.contains(">= 200 files")); + } + + #[test] + fn large_untracked_warning_disabled_when_threshold_disabled() { + let report = GhostSnapshotReport { + large_untracked_dirs: vec![LargeUntrackedDir { + path: PathBuf::from("models"), + file_count: 250, + }], + ignored_untracked_files: Vec::new(), + }; + + assert_eq!(format_large_untracked_warning(None, &report), None); + } +} diff --git a/codex-rs/core/src/tasks/undo.rs b/codex-rs/core/src/tasks/undo.rs index 9862a7ecd1b..5da7edd16fa 100644 --- a/codex-rs/core/src/tasks/undo.rs +++ b/codex-rs/core/src/tasks/undo.rs @@ -8,7 +8,8 @@ use crate::state::TaskKind; use crate::tasks::SessionTask; use crate::tasks::SessionTaskContext; use async_trait::async_trait; -use codex_git::restore_ghost_commit; +use codex_git::RestoreGhostCommitOptions; +use codex_git::restore_ghost_commit_with_options; use codex_protocol::models::ResponseItem; use codex_protocol::user_input::UserInput; use tokio_util::sync::CancellationToken; @@ -85,9 +86,12 @@ impl SessionTask for UndoTask { let commit_id = ghost_commit.id().to_string(); let repo_path = ctx.cwd.clone(); - let restore_result = - tokio::task::spawn_blocking(move || restore_ghost_commit(&repo_path, &ghost_commit)) - .await; + let ghost_snapshot = ctx.ghost_snapshot.clone(); + let restore_result = tokio::task::spawn_blocking(move || { + let options = RestoreGhostCommitOptions::new(&repo_path).ghost_snapshot(ghost_snapshot); + restore_ghost_commit_with_options(&options, &ghost_commit) + }) + .await; match restore_result { Ok(Ok(())) => { diff --git a/codex-rs/utils/git/src/ghost_commits.rs b/codex-rs/utils/git/src/ghost_commits.rs index 8544525f0fa..9de2b52f199 100644 --- a/codex-rs/utils/git/src/ghost_commits.rs +++ b/codex-rs/utils/git/src/ghost_commits.rs @@ -23,8 +23,10 @@ use crate::operations::run_git_for_stdout_all; /// Default commit message used for ghost commits when none is provided. const DEFAULT_COMMIT_MESSAGE: &str = "codex snapshot"; -/// Default threshold that triggers a warning about large untracked directories. -const LARGE_UNTRACKED_WARNING_THRESHOLD: usize = 200; +/// Default threshold for ignoring large untracked directories. +const DEFAULT_IGNORE_LARGE_UNTRACKED_DIRS: i64 = 200; +/// Default threshold (10 MiB) for excluding large untracked files from ghost snapshots. +const DEFAULT_IGNORE_LARGE_UNTRACKED_FILES: i64 = 10 * 1024 * 1024; /// Directories that should always be ignored when capturing ghost snapshots, /// even if they are not listed in .gitignore. /// @@ -50,19 +52,49 @@ pub struct CreateGhostCommitOptions<'a> { pub repo_path: &'a Path, pub message: Option<&'a str>, pub force_include: Vec, + pub ghost_snapshot: GhostSnapshotConfig, +} + +/// Options to control ghost commit restoration. +pub struct RestoreGhostCommitOptions<'a> { + pub repo_path: &'a Path, + pub ghost_snapshot: GhostSnapshotConfig, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct GhostSnapshotConfig { + pub ignore_large_untracked_files: Option, + pub ignore_large_untracked_dirs: Option, +} + +impl Default for GhostSnapshotConfig { + fn default() -> Self { + Self { + ignore_large_untracked_files: Some(DEFAULT_IGNORE_LARGE_UNTRACKED_FILES), + ignore_large_untracked_dirs: Some(DEFAULT_IGNORE_LARGE_UNTRACKED_DIRS), + } + } } /// Summary produced alongside a ghost snapshot. #[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct GhostSnapshotReport { pub large_untracked_dirs: Vec, + pub ignored_untracked_files: Vec, } /// Directory containing a large amount of untracked content. #[derive(Debug, Clone, PartialEq, Eq)] pub struct LargeUntrackedDir { pub path: PathBuf, - pub file_count: usize, + pub file_count: i64, +} + +/// Untracked file excluded from the snapshot because of its size. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct IgnoredUntrackedFile { + pub path: PathBuf, + pub byte_size: i64, } impl<'a> CreateGhostCommitOptions<'a> { @@ -72,6 +104,7 @@ impl<'a> CreateGhostCommitOptions<'a> { repo_path, message: None, force_include: Vec::new(), + ghost_snapshot: GhostSnapshotConfig::default(), } } @@ -81,6 +114,24 @@ impl<'a> CreateGhostCommitOptions<'a> { self } + pub fn ghost_snapshot(mut self, ghost_snapshot: GhostSnapshotConfig) -> Self { + self.ghost_snapshot = ghost_snapshot; + self + } + + /// Exclude untracked files larger than `bytes` from the snapshot commit. + /// + /// These files are still treated as untracked for preservation purposes (i.e. they will not be + /// deleted by undo), but they will not be captured in the snapshot tree. + pub fn ignore_large_untracked_files(mut self, bytes: i64) -> Self { + if bytes > 0 { + self.ghost_snapshot.ignore_large_untracked_files = Some(bytes); + } else { + self.ghost_snapshot.ignore_large_untracked_files = None; + } + self + } + /// Supplies the entire force-include path list at once. pub fn force_include(mut self, paths: I) -> Self where @@ -100,8 +151,56 @@ impl<'a> CreateGhostCommitOptions<'a> { } } -fn detect_large_untracked_dirs(files: &[PathBuf], dirs: &[PathBuf]) -> Vec { - let mut counts: BTreeMap = BTreeMap::new(); +impl<'a> RestoreGhostCommitOptions<'a> { + /// Creates restore options scoped to the provided repository path. + pub fn new(repo_path: &'a Path) -> Self { + Self { + repo_path, + ghost_snapshot: GhostSnapshotConfig::default(), + } + } + + pub fn ghost_snapshot(mut self, ghost_snapshot: GhostSnapshotConfig) -> Self { + self.ghost_snapshot = ghost_snapshot; + self + } + + /// Exclude untracked files larger than `bytes` from undo cleanup. + /// + /// These files are treated as "always preserve" to avoid deleting large local artifacts. + pub fn ignore_large_untracked_files(mut self, bytes: i64) -> Self { + if bytes > 0 { + self.ghost_snapshot.ignore_large_untracked_files = Some(bytes); + } else { + self.ghost_snapshot.ignore_large_untracked_files = None; + } + self + } + + /// Ignore untracked directories that contain at least `file_count` untracked files. + pub fn ignore_large_untracked_dirs(mut self, file_count: i64) -> Self { + if file_count > 0 { + self.ghost_snapshot.ignore_large_untracked_dirs = Some(file_count); + } else { + self.ghost_snapshot.ignore_large_untracked_dirs = None; + } + self + } +} + +fn detect_large_untracked_dirs( + files: &[PathBuf], + dirs: &[PathBuf], + threshold: Option, +) -> Vec { + let Some(threshold) = threshold else { + return Vec::new(); + }; + if threshold <= 0 { + return Vec::new(); + } + + let mut counts: BTreeMap = BTreeMap::new(); let mut sorted_dirs: Vec<&PathBuf> = dirs.iter().collect(); sorted_dirs.sort_by(|a, b| { @@ -129,7 +228,7 @@ fn detect_large_untracked_dirs(files: &[PathBuf], dirs: &[PathBuf]) -> Vec = counts .into_iter() - .filter(|(_, count)| *count >= LARGE_UNTRACKED_WARNING_THRESHOLD) + .filter(|(_, count)| *count >= threshold) .map(|(path, file_count)| LargeUntrackedDir { path, file_count }) .collect(); result.sort_by(|a, b| { @@ -165,22 +264,35 @@ pub fn capture_ghost_snapshot_report( let repo_root = resolve_repository_root(options.repo_path)?; let repo_prefix = repo_subdir(repo_root.as_path(), options.repo_path); - let existing_untracked = - capture_existing_untracked(repo_root.as_path(), repo_prefix.as_deref())?; + let force_include = prepare_force_include(repo_prefix.as_deref(), &options.force_include)?; + let existing_untracked = capture_existing_untracked( + repo_root.as_path(), + repo_prefix.as_deref(), + options.ghost_snapshot.ignore_large_untracked_files, + options.ghost_snapshot.ignore_large_untracked_dirs, + &force_include, + )?; - let warning_files = existing_untracked - .files + let warning_ignored_files = existing_untracked + .ignored_untracked_files .iter() - .map(|path| to_session_relative_path(path, repo_prefix.as_deref())) + .map(|file| IgnoredUntrackedFile { + path: to_session_relative_path(file.path.as_path(), repo_prefix.as_deref()), + byte_size: file.byte_size, + }) .collect::>(); - let warning_dirs = existing_untracked - .dirs + let warning_ignored_dirs = existing_untracked + .ignored_large_untracked_dirs .iter() - .map(|path| to_session_relative_path(path, repo_prefix.as_deref())) + .map(|dir| LargeUntrackedDir { + path: to_session_relative_path(dir.path.as_path(), repo_prefix.as_deref()), + file_count: dir.file_count, + }) .collect::>(); Ok(GhostSnapshotReport { - large_untracked_dirs: detect_large_untracked_dirs(&warning_files, &warning_dirs), + large_untracked_dirs: warning_ignored_dirs, + ignored_untracked_files: warning_ignored_files, }) } @@ -193,28 +305,31 @@ pub fn create_ghost_commit_with_report( let repo_root = resolve_repository_root(options.repo_path)?; let repo_prefix = repo_subdir(repo_root.as_path(), options.repo_path); let parent = resolve_head(repo_root.as_path())?; - let existing_untracked = - capture_existing_untracked(repo_root.as_path(), repo_prefix.as_deref())?; + let force_include = prepare_force_include(repo_prefix.as_deref(), &options.force_include)?; + let existing_untracked = capture_existing_untracked( + repo_root.as_path(), + repo_prefix.as_deref(), + options.ghost_snapshot.ignore_large_untracked_files, + options.ghost_snapshot.ignore_large_untracked_dirs, + &force_include, + )?; - let warning_files = existing_untracked - .files + let warning_ignored_files = existing_untracked + .ignored_untracked_files .iter() - .map(|path| to_session_relative_path(path, repo_prefix.as_deref())) + .map(|file| IgnoredUntrackedFile { + path: to_session_relative_path(file.path.as_path(), repo_prefix.as_deref()), + byte_size: file.byte_size, + }) .collect::>(); - let warning_dirs = existing_untracked - .dirs + let large_untracked_dirs = existing_untracked + .ignored_large_untracked_dirs .iter() - .map(|path| to_session_relative_path(path, repo_prefix.as_deref())) + .map(|dir| LargeUntrackedDir { + path: to_session_relative_path(dir.path.as_path(), repo_prefix.as_deref()), + file_count: dir.file_count, + }) .collect::>(); - let large_untracked_dirs = detect_large_untracked_dirs(&warning_files, &warning_dirs); - - let normalized_force = options - .force_include - .iter() - .map(|path| normalize_relative_path(path)) - .collect::, _>>()?; - let force_include = - apply_repo_prefix_to_force_include(repo_prefix.as_deref(), &normalized_force); let index_tempdir = Builder::new().prefix("codex-git-index-").tempdir()?; let index_path = index_tempdir.path().join("index"); let base_env = vec![( @@ -238,6 +353,16 @@ pub fn create_ghost_commit_with_report( } run_git_for_status(repo_root.as_path(), add_args, Some(base_env.as_slice()))?; + remove_large_untracked_dirs_from_index( + repo_root.as_path(), + base_env.as_slice(), + &existing_untracked.ignored_large_untracked_dir_files, + )?; + remove_large_untracked_files_from_index( + repo_root.as_path(), + base_env.as_slice(), + &existing_untracked.ignored_untracked_files, + )?; if !force_include.is_empty() { let mut args = Vec::with_capacity(force_include.len() + 2); args.push(OsString::from("add")); @@ -278,26 +403,46 @@ pub fn create_ghost_commit_with_report( let ghost_commit = GhostCommit::new( commit_id, parent, - existing_untracked.files, - existing_untracked.dirs, + merge_preserved_untracked_files( + existing_untracked.files, + &existing_untracked.ignored_untracked_files, + ), + merge_preserved_untracked_dirs( + existing_untracked.dirs, + &existing_untracked.ignored_large_untracked_dirs, + ), ); Ok(( ghost_commit, GhostSnapshotReport { large_untracked_dirs, + ignored_untracked_files: warning_ignored_files, }, )) } /// Restore the working tree to match the provided ghost commit. pub fn restore_ghost_commit(repo_path: &Path, commit: &GhostCommit) -> Result<(), GitToolingError> { - ensure_git_repository(repo_path)?; + restore_ghost_commit_with_options(&RestoreGhostCommitOptions::new(repo_path), commit) +} - let repo_root = resolve_repository_root(repo_path)?; - let repo_prefix = repo_subdir(repo_root.as_path(), repo_path); - let current_untracked = - capture_existing_untracked(repo_root.as_path(), repo_prefix.as_deref())?; +/// Restore the working tree using the provided options. +pub fn restore_ghost_commit_with_options( + options: &RestoreGhostCommitOptions<'_>, + commit: &GhostCommit, +) -> Result<(), GitToolingError> { + ensure_git_repository(options.repo_path)?; + + let repo_root = resolve_repository_root(options.repo_path)?; + let repo_prefix = repo_subdir(repo_root.as_path(), options.repo_path); + let current_untracked = capture_existing_untracked( + repo_root.as_path(), + repo_prefix.as_deref(), + options.ghost_snapshot.ignore_large_untracked_files, + options.ghost_snapshot.ignore_large_untracked_dirs, + &[], + )?; restore_to_commit_inner(repo_root.as_path(), repo_prefix.as_deref(), commit.id())?; remove_new_untracked( repo_root.as_path(), @@ -345,6 +490,9 @@ fn restore_to_commit_inner( struct UntrackedSnapshot { files: Vec, dirs: Vec, + ignored_untracked_files: Vec, + ignored_large_untracked_dirs: Vec, + ignored_large_untracked_dir_files: Vec, } /// Captures the untracked and ignored entries under `repo_root`, optionally limited by `repo_prefix`. @@ -352,6 +500,9 @@ struct UntrackedSnapshot { fn capture_existing_untracked( repo_root: &Path, repo_prefix: Option<&Path>, + ignore_large_untracked_files: Option, + ignore_large_untracked_dirs: Option, + force_include: &[PathBuf], ) -> Result { // Ask git for the zero-delimited porcelain status so we can enumerate // every untracked path (including ones filtered by prefix). @@ -372,6 +523,7 @@ fn capture_existing_untracked( } let mut snapshot = UntrackedSnapshot::default(); + let mut untracked_files_for_dir_scan: Vec = Vec::new(); // Each entry is of the form " " where code is '?' (untracked) // or '!' (ignored); everything else is irrelevant to this snapshot. for entry in output.split('\0') { @@ -399,11 +551,64 @@ fn capture_existing_untracked( let is_dir = absolute.is_dir(); if is_dir { snapshot.dirs.push(normalized); + } else if code == "?" { + untracked_files_for_dir_scan.push(normalized.clone()); + if let Some(threshold) = ignore_large_untracked_files + && threshold > 0 + && !is_force_included(&normalized, force_include) + && let Ok(Some(byte_size)) = untracked_file_size(&absolute) + && byte_size > threshold + { + snapshot.ignored_untracked_files.push(IgnoredUntrackedFile { + path: normalized, + byte_size, + }); + } else { + snapshot.files.push(normalized); + } } else { snapshot.files.push(normalized); } } + if let Some(threshold) = ignore_large_untracked_dirs + && threshold > 0 + { + let ignored_large_untracked_dirs = detect_large_untracked_dirs( + &untracked_files_for_dir_scan, + &snapshot.dirs, + Some(threshold), + ) + .into_iter() + .filter(|entry| !entry.path.as_os_str().is_empty() && entry.path != Path::new(".")) + .collect::>(); + + if !ignored_large_untracked_dirs.is_empty() { + let ignored_dir_paths = ignored_large_untracked_dirs + .iter() + .map(|entry| entry.path.as_path()) + .collect::>(); + + snapshot + .files + .retain(|path| !ignored_dir_paths.iter().any(|dir| path.starts_with(dir))); + snapshot + .dirs + .retain(|path| !ignored_dir_paths.iter().any(|dir| path.starts_with(dir))); + snapshot.ignored_untracked_files.retain(|file| { + !ignored_dir_paths + .iter() + .any(|dir| file.path.starts_with(dir)) + }); + + snapshot.ignored_large_untracked_dir_files = untracked_files_for_dir_scan + .into_iter() + .filter(|path| ignored_dir_paths.iter().any(|dir| path.starts_with(dir))) + .collect(); + snapshot.ignored_large_untracked_dirs = ignored_large_untracked_dirs; + } + } + Ok(snapshot) } @@ -420,6 +625,111 @@ fn should_ignore_for_snapshot(path: &Path) -> bool { }) } +fn prepare_force_include( + repo_prefix: Option<&Path>, + force_include: &[PathBuf], +) -> Result, GitToolingError> { + let normalized_force = force_include + .iter() + .map(PathBuf::as_path) + .map(normalize_relative_path) + .collect::, _>>()?; + Ok(apply_repo_prefix_to_force_include( + repo_prefix, + &normalized_force, + )) +} + +fn is_force_included(path: &Path, force_include: &[PathBuf]) -> bool { + force_include + .iter() + .any(|candidate| path.starts_with(candidate.as_path())) +} + +fn untracked_file_size(path: &Path) -> io::Result> { + let Ok(metadata) = fs::symlink_metadata(path) else { + return Ok(None); + }; + + let Ok(len_i64) = i64::try_from(metadata.len()) else { + return Ok(Some(i64::MAX)); + }; + Ok(Some(len_i64)) +} + +fn remove_large_untracked_files_from_index( + repo_root: &Path, + env: &[(OsString, OsString)], + ignored: &[IgnoredUntrackedFile], +) -> Result<(), GitToolingError> { + let paths = ignored + .iter() + .map(|entry| entry.path.clone()) + .collect::>(); + remove_paths_from_index(repo_root, env, &paths) +} + +fn remove_large_untracked_dirs_from_index( + repo_root: &Path, + env: &[(OsString, OsString)], + paths: &[PathBuf], +) -> Result<(), GitToolingError> { + remove_paths_from_index(repo_root, env, paths) +} + +fn remove_paths_from_index( + repo_root: &Path, + env: &[(OsString, OsString)], + paths: &[PathBuf], +) -> Result<(), GitToolingError> { + if paths.is_empty() { + return Ok(()); + } + + const CHUNK_SIZE: usize = 64; + for chunk in paths.chunks(CHUNK_SIZE) { + let mut args = vec![ + OsString::from("update-index"), + OsString::from("--force-remove"), + OsString::from("--"), + ]; + args.extend(chunk.iter().map(|path| path.as_os_str().to_os_string())); + run_git_for_status(repo_root, args, Some(env))?; + } + + Ok(()) +} + +fn merge_preserved_untracked_files( + mut files: Vec, + ignored: &[IgnoredUntrackedFile], +) -> Vec { + if ignored.is_empty() { + return files; + } + + files.extend(ignored.iter().map(|entry| entry.path.clone())); + files +} + +fn merge_preserved_untracked_dirs( + mut dirs: Vec, + ignored_large_dirs: &[LargeUntrackedDir], +) -> Vec { + if ignored_large_dirs.is_empty() { + return dirs; + } + + for entry in ignored_large_dirs { + if dirs.iter().any(|dir| dir == &entry.path) { + continue; + } + dirs.push(entry.path.clone()); + } + + dirs +} + /// Removes untracked files and directories that were not present when the snapshot was captured. fn remove_new_untracked( repo_root: &Path, @@ -514,6 +824,7 @@ mod tests { use crate::operations::run_git_for_stdout; use assert_matches::assert_matches; use pretty_assertions::assert_eq; + use std::fs::File; use std::process::Command; use walkdir::WalkDir; @@ -544,6 +855,14 @@ mod tests { run_git_in(repo, &["config", "core.autocrlf", "false"]); } + fn create_sparse_file(path: &Path, bytes: i64) -> io::Result<()> { + let file_len = + u64::try_from(bytes).map_err(|_| io::Error::from(io::ErrorKind::InvalidInput))?; + let file = File::create(path)?; + file.set_len(file_len)?; + Ok(()) + } + #[test] /// Verifies a ghost commit can be created and restored end to end. fn create_and_restore_roundtrip() -> Result<(), GitToolingError> { @@ -615,6 +934,62 @@ mod tests { Ok(()) } + #[test] + fn snapshot_ignores_large_untracked_files() -> Result<(), GitToolingError> { + let temp = tempfile::tempdir()?; + let repo = temp.path(); + init_test_repo(repo); + + std::fs::write(repo.join("tracked.txt"), "contents\n")?; + run_git_in(repo, &["add", "tracked.txt"]); + run_git_in( + repo, + &[ + "-c", + "user.name=Tester", + "-c", + "user.email=test@example.com", + "commit", + "-m", + "initial", + ], + ); + + let big = repo.join("big.bin"); + let big_size = 2 * 1024 * 1024; + create_sparse_file(&big, big_size)?; + + let (ghost, report) = create_ghost_commit_with_report( + &CreateGhostCommitOptions::new(repo).ignore_large_untracked_files(1024), + )?; + assert!(ghost.parent().is_some()); + assert_eq!( + report.ignored_untracked_files, + vec![IgnoredUntrackedFile { + path: PathBuf::from("big.bin"), + byte_size: big_size, + }] + ); + + let exists_in_commit = Command::new("git") + .current_dir(repo) + .args(["cat-file", "-e", &format!("{}:big.bin", ghost.id())]) + .status() + .map(|status| status.success()) + .unwrap_or(false); + assert!(!exists_in_commit); + + std::fs::write(repo.join("ephemeral.txt"), "temp\n")?; + restore_ghost_commit(repo, &ghost)?; + assert!( + big.exists(), + "big.bin should be preserved during undo cleanup" + ); + assert!(!repo.join("ephemeral.txt").exists()); + + Ok(()) + } + #[test] fn create_snapshot_reports_large_untracked_dirs() -> Result<(), GitToolingError> { let temp = tempfile::tempdir()?; @@ -638,7 +1013,8 @@ mod tests { let models = repo.join("models"); std::fs::create_dir(&models)?; - for idx in 0..(LARGE_UNTRACKED_WARNING_THRESHOLD + 1) { + let threshold = DEFAULT_IGNORE_LARGE_UNTRACKED_DIRS; + for idx in 0..(threshold + 1) { let file = models.join(format!("weights-{idx}.bin")); std::fs::write(file, "data\n")?; } @@ -650,10 +1026,83 @@ mod tests { report.large_untracked_dirs, vec![LargeUntrackedDir { path: PathBuf::from("models"), - file_count: LARGE_UNTRACKED_WARNING_THRESHOLD + 1, + file_count: threshold + 1, }] ); + let exists_in_commit = Command::new("git") + .current_dir(repo) + .args([ + "cat-file", + "-e", + &format!("{}:models/weights-0.bin", ghost.id()), + ]) + .status() + .map(|status| status.success()) + .unwrap_or(false); + assert!(!exists_in_commit); + + std::fs::write(repo.join("ephemeral.txt"), "temp\n")?; + restore_ghost_commit(repo, &ghost)?; + assert!( + repo.join("models/weights-0.bin").exists(), + "ignored untracked directories should be preserved during undo cleanup" + ); + assert!(!repo.join("ephemeral.txt").exists()); + + Ok(()) + } + + #[test] + fn restore_preserves_large_untracked_dirs_when_threshold_disabled() + -> Result<(), GitToolingError> { + let temp = tempfile::tempdir()?; + let repo = temp.path(); + init_test_repo(repo); + + std::fs::write(repo.join("tracked.txt"), "contents\n")?; + run_git_in(repo, &["add", "tracked.txt"]); + run_git_in( + repo, + &[ + "-c", + "user.name=Tester", + "-c", + "user.email=test@example.com", + "commit", + "-m", + "initial", + ], + ); + + let models = repo.join("models"); + std::fs::create_dir(&models)?; + let threshold: i64 = 2; + for idx in 0..(threshold + 1) { + let file = models.join(format!("weights-{idx}.bin")); + std::fs::write(file, "data\n")?; + } + + let snapshot_config = GhostSnapshotConfig { + ignore_large_untracked_files: Some(DEFAULT_IGNORE_LARGE_UNTRACKED_FILES), + ignore_large_untracked_dirs: Some(threshold), + }; + let (ghost, _report) = create_ghost_commit_with_report( + &CreateGhostCommitOptions::new(repo).ghost_snapshot(snapshot_config), + )?; + + std::fs::write(repo.join("ephemeral.txt"), "temp\n")?; + restore_ghost_commit_with_options( + &RestoreGhostCommitOptions::new(repo).ignore_large_untracked_dirs(0), + &ghost, + )?; + + assert!( + repo.join("models/weights-0.bin").exists(), + "ignored untracked directories should be preserved during undo cleanup, even when the threshold is disabled at restore time" + ); + assert!(!repo.join("ephemeral.txt").exists()); + Ok(()) } @@ -847,12 +1296,14 @@ mod tests { // Create a large untracked tree nested under the tracked src directory. let generated = src.join("generated").join("cache"); std::fs::create_dir_all(&generated)?; - for idx in 0..(LARGE_UNTRACKED_WARNING_THRESHOLD + 1) { + let threshold = DEFAULT_IGNORE_LARGE_UNTRACKED_DIRS; + for idx in 0..(threshold + 1) { let file = generated.join(format!("file-{idx}.bin")); std::fs::write(file, "data\n")?; } - let (_, report) = create_ghost_commit_with_report(&CreateGhostCommitOptions::new(repo))?; + let (ghost, report) = + create_ghost_commit_with_report(&CreateGhostCommitOptions::new(repo))?; assert_eq!(report.large_untracked_dirs.len(), 1); let entry = &report.large_untracked_dirs[0]; assert_ne!(entry.path, PathBuf::from("src")); @@ -861,7 +1312,19 @@ mod tests { "unexpected path for large untracked directory: {}", entry.path.display() ); - assert_eq!(entry.file_count, LARGE_UNTRACKED_WARNING_THRESHOLD + 1); + assert_eq!(entry.file_count, threshold + 1); + + let exists_in_commit = Command::new("git") + .current_dir(repo) + .args([ + "cat-file", + "-e", + &format!("{}:src/generated/cache/file-0.bin", ghost.id()), + ]) + .status() + .map(|status| status.success()) + .unwrap_or(false); + assert!(!exists_in_commit); Ok(()) } diff --git a/codex-rs/utils/git/src/lib.rs b/codex-rs/utils/git/src/lib.rs index 7dca3690629..c4db10fc69c 100644 --- a/codex-rs/utils/git/src/lib.rs +++ b/codex-rs/utils/git/src/lib.rs @@ -17,12 +17,16 @@ pub use apply::stage_paths; pub use branch::merge_base_with_head; pub use errors::GitToolingError; pub use ghost_commits::CreateGhostCommitOptions; +pub use ghost_commits::GhostSnapshotConfig; pub use ghost_commits::GhostSnapshotReport; +pub use ghost_commits::IgnoredUntrackedFile; pub use ghost_commits::LargeUntrackedDir; +pub use ghost_commits::RestoreGhostCommitOptions; pub use ghost_commits::capture_ghost_snapshot_report; pub use ghost_commits::create_ghost_commit; pub use ghost_commits::create_ghost_commit_with_report; pub use ghost_commits::restore_ghost_commit; +pub use ghost_commits::restore_ghost_commit_with_options; pub use ghost_commits::restore_to_commit; pub use platform::create_symlink; use schemars::JsonSchema; diff --git a/docs/config.md b/docs/config.md index ca4ba015e93..febb69a470c 100644 --- a/docs/config.md +++ b/docs/config.md @@ -943,6 +943,8 @@ Valid values: | `tui.animations` | boolean | Enable terminal animations (welcome screen, shimmer, spinner). Defaults to true; set to `false` to disable visual motion. | | `instructions` | string | Currently ignored; use `experimental_instructions_file` or `AGENTS.md`. | | `features.` | boolean | See [feature flags](#feature-flags) for details | +| `ghost_snapshot.ignore_large_untracked_files` | number | Exclude untracked files larger than this many bytes from ghost snapshots (default: 10 MiB). Set to `0` to disable. | +| `ghost_snapshot.ignore_large_untracked_dirs` | number | Ignore untracked directories with at least this many files (default: 200). Set to `0` to disable. | | `mcp_servers..command` | string | MCP server launcher command (stdio servers only). | | `mcp_servers..args` | array | MCP server args (stdio servers only). | | `mcp_servers..env` | map | MCP server env vars (stdio servers only). |