diff --git a/inc/Workspace/Workspace.php b/inc/Workspace/Workspace.php index b1b6e1d..d38be4b 100644 --- a/inc/Workspace/Workspace.php +++ b/inc/Workspace/Workspace.php @@ -659,6 +659,31 @@ public function worktree_cleanup_merged( array $opts = array() ): array|\WP_Erro $dirty_count = (int) $dirty_probe; if ( $dirty_count > 0 && ! $force ) { + $artifact_dirty = $this->classify_artifact_only_dirty_worktree($repo, $wt_path); + if ( is_array($artifact_dirty) ) { + $skipped[] = array_merge( + array( + 'handle' => $handle, + 'repo' => $repo, + 'branch' => $branch, + 'path' => $wt_path, + 'reason_code' => 'artifact_only_dirty_worktree', + 'reason' => sprintf('working tree dirty only from declared/generated artifact paths (%d files) - run artifact cleanup instead of force-removing the worktree', $dirty_count), + 'dirty' => $dirty_count, + 'hint' => 'Run studio wp datamachine-code workspace worktree cleanup-artifacts --dry-run to review reconstructable artifact cleanup; source edits are still protected by dirty_worktree.', + 'created_at' => $created_at, + 'metadata' => $metadata, + ), $disk_fields, array( + 'artifact_dirty_paths' => $artifact_dirty['paths'], + 'artifacts' => $artifact_dirty['artifacts'], + 'artifact_size_bytes' => $artifact_dirty['artifact_size_bytes'], + 'size_bytes' => max( (int) ( $disk_fields['size_bytes'] ?? 0 ), (int) $artifact_dirty['artifact_size_bytes'] ), + 'estimated_size_bytes' => max( (int) ( $disk_fields['estimated_size_bytes'] ?? 0 ), (int) $artifact_dirty['artifact_size_bytes'] ), + ) + ); + continue; + } + // Before falling through to the generic dirty_worktree skip, try to // classify whether this is the "merged PR + obsolete dirty edits" // shape. That bucket is still skipped (force=false stays safe), but @@ -3206,6 +3231,7 @@ private function worktree_cleanup_buckets( int $candidate_count, array $candidat + (int) ( $skipped_by_reason['unpushed_commits'] ?? 0 ); $buckets = array( + 'artifact_only_dirty_worktree' => (int) ( $skipped_by_reason['artifact_only_dirty_worktree'] ?? 0 ), 'blocked_by_dirty_or_unpushed' => $blocked_by_dirty_or_unpushed, 'needs_full_review' => $needs_full_review, 'needs_reconciliation' => $needs_reconciliation, @@ -3306,6 +3332,13 @@ private function build_no_merge_signal_evidence( string $primary_path, string $b private function worktree_cleanup_skipped_next_commands( array $skipped_by_reason ): array { $active_no_signal_commands = $this->build_active_no_signal_next_commands(25, 0); $templates = array( + 'artifact_only_dirty_worktree' => array( + 'label' => 'Review generated artifact cleanup separately', + 'command' => 'studio wp datamachine-code workspace worktree cleanup-artifacts --dry-run --format=json', + 'alternative' => 'studio wp datamachine-code workspace cleanup run --mode=artifacts --dry-run', + 'why' => 'Dirty paths are limited to declared reconstructable artifact directories, so artifact cleanup can shed them without force-removing source worktrees.', + 'destructive' => false, + ), 'lifecycle_reconciliation_candidate' => array( 'label' => 'Run DMC-owned lifecycle reconciliation before cleanup eligibility', 'command' => 'studio wp datamachine-code workspace worktree cleanup --dry-run --format=json', @@ -3527,6 +3560,71 @@ private function detect_worktree_artifacts( string $repo, string $path ): array return $rows; } + /** + * Classify dirty worktrees whose dirty paths are only generated artifacts. + * + * @param string $repo Repo name. + * @param string $path Worktree path. + * @return array{paths: array, artifacts: array>, artifact_size_bytes: int}|null Artifact-only classification. + */ + private function classify_artifact_only_dirty_worktree( string $repo, string $path ): ?array { + if ( '' === $repo || '' === $path || ! is_dir($path) ) { + return null; + } + + $artifacts = $this->detect_worktree_artifacts($repo, $path); + if ( array() === $artifacts ) { + return null; + } + + $status = $this->run_git($path, 'status --porcelain', self::CLEANUP_GIT_PROBE_TIMEOUT); + if ( is_wp_error($status) ) { + return null; + } + + $dirty_paths = array(); + foreach ( explode("\n", (string) ( $status['output'] ?? '' )) as $line ) { + $line = rtrim($line, "\r"); + if ( '' === $line ) { + continue; + } + + $path_part = trim(substr($line, 3)); + if ( str_contains($path_part, ' -> ') ) { + $path_part = trim( (string) substr(strrchr($path_part, '>'), 1)); + } + $path_part = trim($path_part, ' /'); + if ( '' === $path_part ) { + return null; + } + $dirty_paths[] = $path_part; + } + + if ( array() === $dirty_paths ) { + return null; + } + + $artifact_paths = array_map(fn( $artifact ) => trim( (string) ( $artifact['path'] ?? '' ), '/'), $artifacts); + foreach ( $dirty_paths as $dirty_path ) { + $matched = false; + foreach ( $artifact_paths as $artifact_path ) { + if ( '' !== $artifact_path && ( $dirty_path === $artifact_path || str_starts_with($dirty_path, $artifact_path . '/') ) ) { + $matched = true; + break; + } + } + if ( ! $matched ) { + return null; + } + } + + return array( + 'paths' => $dirty_paths, + 'artifacts' => $artifacts, + 'artifact_size_bytes' => array_sum(array_map(fn( $artifact ) => (int) ( $artifact['size_bytes'] ?? 0 ), $artifacts)), + ); + } + /** * Resolve repo-specific artifact profile paths. * @@ -3543,6 +3641,7 @@ private function get_worktree_artifact_profile( string $repo, string $path ): ar if ( is_file(rtrim($path, '/') . '/package.json') ) { $profile['node_modules'] = 'Node dependencies'; + $profile['build'] = 'JavaScript build output'; $profile['.next'] = 'Next.js build cache'; $profile['dist'] = 'JavaScript build output'; $profile['coverage'] = 'test coverage output'; diff --git a/tests/smoke-worktree-cleanup.php b/tests/smoke-worktree-cleanup.php index f80e198..8587472 100644 --- a/tests/smoke-worktree-cleanup.php +++ b/tests/smoke-worktree-cleanup.php @@ -220,9 +220,10 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra $run('git config user.name test', $primary); file_put_contents($primary . '/README.md', "demo\n"); file_put_contents($primary . '/Cargo.toml', "[package]\nname = \"demo\"\nversion = \"0.1.0\"\n"); + file_put_contents($primary . '/package.json', "{\"scripts\":{\"build\":\"echo build\"}}\n"); file_put_contents($primary . '/.gitignore', "target/\n"); $run('git add README.md && git commit -m init', $primary); - $run('git add Cargo.toml .gitignore && git commit -m tooling', $primary); + $run('git add Cargo.toml package.json .gitignore && git commit -m tooling', $primary); $run('git branch -M main', $primary); $run('git push -u origin main', $primary); @@ -243,6 +244,8 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra $make_branch('merged-live-remote', 'd'); // → simulate via PR-merged (stubbed → none) $make_branch('unmerged-feature', 'e'); // still active $make_branch('dirty-branch', 'f'); // will be dirty in worktree + $make_branch('artifact-only-dirty', 'f2'); // will be dirty only from build/ + $make_branch('mixed-artifact-dirty', 'f3'); // will have build/ plus a source edit $make_branch('external-branch', 'g'); // outside workspace, should never be removed // Create worktrees at various paths: @@ -257,12 +260,19 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra $run(sprintf('git worktree add %s merged-live-remote', escapeshellarg($tmp . '/demo-unmanaged-merged')), $primary); $run(sprintf('git worktree add %s unmerged-feature', escapeshellarg($tmp . '/demo@unmerged-feature')), $primary); $run(sprintf('git worktree add %s dirty-branch', escapeshellarg($tmp . '/demo@dirty-branch')), $primary); + $run(sprintf('git worktree add %s artifact-only-dirty', escapeshellarg($tmp . '/demo@artifact-only-dirty')), $primary); + $run(sprintf('git worktree add %s mixed-artifact-dirty', escapeshellarg($tmp . '/demo@mixed-artifact-dirty')), $primary); mkdir($tmp . '-external', 0755, true); $run(sprintf('git worktree add %s external-branch', escapeshellarg($tmp . '-external/demo-external')), $primary); $external_real = realpath($tmp . '-external/demo-external') ? realpath($tmp . '-external/demo-external') : $tmp . '-external/demo-external'; // Dirty the dirty worktree. file_put_contents($tmp . '/demo@dirty-branch/scratch.txt', 'dirty'); + mkdir($tmp . '/demo@artifact-only-dirty/build', 0755, true); + file_put_contents($tmp . '/demo@artifact-only-dirty/build/output.js', 'artifact'); + mkdir($tmp . '/demo@mixed-artifact-dirty/build', 0755, true); + file_put_contents($tmp . '/demo@mixed-artifact-dirty/build/output.js', 'artifact'); + file_put_contents($tmp . '/demo@mixed-artifact-dirty/README.md', "source edit\n"); mkdir($tmp . '/demo@merged-autodelete/target', 0755, true); file_put_contents($tmp . '/demo@merged-autodelete/target/artifact.bin', str_repeat('x', 4096)); @@ -352,6 +362,8 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra $run(sprintf('git --git-dir=%s update-ref -d refs/heads/merged-stale-plan', escapeshellarg($remote))); $run(sprintf('git --git-dir=%s update-ref -d refs/heads/merged-recent', escapeshellarg($remote))); $run(sprintf('git --git-dir=%s update-ref -d refs/heads/merged-unknown-age', escapeshellarg($remote))); + $run(sprintf('git --git-dir=%s update-ref -d refs/heads/artifact-only-dirty', escapeshellarg($remote))); + $run(sprintf('git --git-dir=%s update-ref -d refs/heads/mixed-artifact-dirty', escapeshellarg($remote))); // ------------------------------------------------------------------------- // Dry-run assertions @@ -394,6 +406,19 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra $assert(true, str_contains($dirty_reason, 'dirty'), 'dirty skip reason mentions dirty'); $assert('dirty_worktree', $dirty_row['reason_code'] ?? '', 'dirty skip exposes stable reason code'); + $artifact_dirty_skips = array_filter($plan['skipped'] ?? array(), fn( $s ) => ( $s['handle'] ?? '' ) === 'demo@artifact-only-dirty'); + $assert(1, count($artifact_dirty_skips), 'artifact-only dirty worktree skipped with exactly one entry'); + $artifact_dirty_row = array_values($artifact_dirty_skips)[0] ?? array(); + $assert('artifact_only_dirty_worktree', $artifact_dirty_row['reason_code'] ?? '', 'artifact-only dirt exposes stable reason code'); + $assert(array( 'build' ), $artifact_dirty_row['artifact_dirty_paths'] ?? array(), 'artifact-only dirty row reports dirty artifact path'); + $assert('build', $artifact_dirty_row['artifacts'][0]['path'] ?? '', 'artifact-only dirty row reports matching artifact profile path'); + $assert(true, str_contains($artifact_dirty_row['hint'] ?? '', 'cleanup-artifacts --dry-run'), 'artifact-only dirty row points to artifact cleanup lane'); + + $mixed_dirty_skips = array_filter($plan['skipped'] ?? array(), fn( $s ) => ( $s['handle'] ?? '' ) === 'demo@mixed-artifact-dirty'); + $assert(1, count($mixed_dirty_skips), 'mixed source/artifact dirty worktree skipped with exactly one entry'); + $mixed_dirty_row = array_values($mixed_dirty_skips)[0] ?? array(); + $assert('dirty_worktree', $mixed_dirty_row['reason_code'] ?? '', 'mixed source/artifact dirt stays protected as generic dirty worktree'); + // unmerged-feature should be skipped (no merge signal) $unmerged = array_filter($plan['skipped'] ?? array(), fn( $s ) => ( $s['handle'] ?? '' ) === 'demo@unmerged-feature'); $assert(1, count($unmerged), 'unmerged worktree skipped with exactly one entry'); @@ -410,7 +435,10 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra $assert(true, str_contains($external_row['hint'] ?? '', 'outside the DMC workspace'), 'external worktree includes remediation hint'); $assert(4, (int) ( $plan['summary']['would_remove'] ?? 0 ), 'summary counts cleanup candidates'); - $assert(1, (int) ( $plan['summary']['skipped_by_reason']['dirty_worktree'] ?? 0 ), 'summary counts dirty skips by reason'); + $assert(2, (int) ( $plan['summary']['skipped_by_reason']['dirty_worktree'] ?? 0 ), 'summary counts dirty skips by reason'); + $assert(1, (int) ( $plan['summary']['skipped_by_reason']['artifact_only_dirty_worktree'] ?? 0 ), 'summary counts artifact-only dirty skips separately'); + $assert(1, (int) ( $plan['summary']['cleanup_buckets']['artifact_only_dirty_worktree'] ?? 0 ), 'cleanup buckets expose artifact-only dirty count'); + $assert(true, in_array('artifact_only_dirty_worktree', array_column($plan['summary']['skipped_next_commands'] ?? array(), 'reason_code'), true), 'summary exposes artifact-only dirty next command'); $assert(true, isset($plan['summary']['skipped_by_reason']['no_merge_signal']), 'summary includes no_merge_signal bucket'); $next_commands = (array) ( $plan['summary']['skipped_next_commands'] ?? array() ); $assert(true, in_array('no_merge_signal', array_column($next_commands, 'reason_code'), true), 'summary includes no_merge_signal next command'); @@ -444,12 +472,12 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra $assert('needs_metadata_reconcile', $inventory_missing['reason_code'] ?? '', 'inventory-only missing metadata requires metadata reconciliation'); $inventory_buckets = (array) ( $inventory_plan['summary']['cleanup_buckets'] ?? array() ); $assert(2, (int) ( $inventory_buckets['safe_to_remove_now'] ?? 0 ), 'inventory cleanup bucket counts safe-to-remove candidates'); - $assert(5, (int) ( $inventory_buckets['needs_reconciliation'] ?? 0 ), 'inventory cleanup bucket counts reconciliation candidates separately'); + $assert(7, (int) ( $inventory_buckets['needs_reconciliation'] ?? 0 ), 'inventory cleanup bucket counts reconciliation candidates separately'); $assert(4, (int) ( $inventory_buckets['needs_full_review'] ?? 0 ), 'inventory cleanup bucket counts full-review rows separately'); $assert(0, (int) ( $inventory_buckets['blocked_by_dirty_or_unpushed'] ?? -1 ), 'inventory cleanup bucket keeps dirty/unpushed blockers separate'); $assert(2, (int) ( $inventory_buckets['explicit_cleanup_candidates'] ?? 0 ), 'inventory cleanup bucket counts explicit cleanup candidates'); $assert(1, (int) ( $inventory_buckets['lifecycle_reconciliation_candidates'] ?? 0 ), 'inventory cleanup bucket counts lifecycle reconciliation candidates'); - $assert(4, (int) ( $inventory_buckets['metadata_reconciliation_candidates'] ?? 0 ), 'inventory cleanup bucket counts metadata reconciliation candidates'); + $assert(6, (int) ( $inventory_buckets['metadata_reconciliation_candidates'] ?? 0 ), 'inventory cleanup bucket counts metadata reconciliation candidates'); $assert(4, (int) ( $inventory_buckets['active_no_signal'] ?? 0 ), 'inventory cleanup bucket counts active/no-signal rows'); $inventory_apply_hint = (array) ( $inventory_plan['summary']['bounded_cleanup_eligible_apply'] ?? array() ); $assert('studio wp datamachine-code workspace worktree bounded-cleanup-eligible-apply --limit=2', $inventory_plan['summary']['apply_command'] ?? '', 'inventory-only dry-run exposes bounded cleanup-eligible apply command');