Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions inc/Workspace/Workspace.php
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,31 @@ public function worktree_cleanup_merged( array $opts = array() ): array|\WP_Erro
$dirty_count = (int) $dirty_probe;

if ( $dirty_count > 0 && ! $force ) {
$artifact_dirty = $this->classify_artifact_only_dirty_worktree($repo, $wt_path);
if ( is_array($artifact_dirty) ) {
$skipped[] = array_merge(
array(
'handle' => $handle,
'repo' => $repo,
'branch' => $branch,
'path' => $wt_path,
'reason_code' => 'artifact_only_dirty_worktree',
'reason' => sprintf('working tree dirty only from declared/generated artifact paths (%d files) - run artifact cleanup instead of force-removing the worktree', $dirty_count),
'dirty' => $dirty_count,
'hint' => 'Run studio wp datamachine-code workspace worktree cleanup-artifacts --dry-run to review reconstructable artifact cleanup; source edits are still protected by dirty_worktree.',
'created_at' => $created_at,
'metadata' => $metadata,
), $disk_fields, array(
'artifact_dirty_paths' => $artifact_dirty['paths'],
'artifacts' => $artifact_dirty['artifacts'],
'artifact_size_bytes' => $artifact_dirty['artifact_size_bytes'],
'size_bytes' => max( (int) ( $disk_fields['size_bytes'] ?? 0 ), (int) $artifact_dirty['artifact_size_bytes'] ),
'estimated_size_bytes' => max( (int) ( $disk_fields['estimated_size_bytes'] ?? 0 ), (int) $artifact_dirty['artifact_size_bytes'] ),
)
);
continue;
}

// Before falling through to the generic dirty_worktree skip, try to
// classify whether this is the "merged PR + obsolete dirty edits"
// shape. That bucket is still skipped (force=false stays safe), but
Expand Down Expand Up @@ -3206,6 +3231,7 @@ private function worktree_cleanup_buckets( int $candidate_count, array $candidat
+ (int) ( $skipped_by_reason['unpushed_commits'] ?? 0 );

$buckets = array(
'artifact_only_dirty_worktree' => (int) ( $skipped_by_reason['artifact_only_dirty_worktree'] ?? 0 ),
'blocked_by_dirty_or_unpushed' => $blocked_by_dirty_or_unpushed,
'needs_full_review' => $needs_full_review,
'needs_reconciliation' => $needs_reconciliation,
Expand Down Expand Up @@ -3306,6 +3332,13 @@ private function build_no_merge_signal_evidence( string $primary_path, string $b
private function worktree_cleanup_skipped_next_commands( array $skipped_by_reason ): array {
$active_no_signal_commands = $this->build_active_no_signal_next_commands(25, 0);
$templates = array(
'artifact_only_dirty_worktree' => array(
'label' => 'Review generated artifact cleanup separately',
'command' => 'studio wp datamachine-code workspace worktree cleanup-artifacts --dry-run --format=json',
'alternative' => 'studio wp datamachine-code workspace cleanup run --mode=artifacts --dry-run',
'why' => 'Dirty paths are limited to declared reconstructable artifact directories, so artifact cleanup can shed them without force-removing source worktrees.',
'destructive' => false,
),
'lifecycle_reconciliation_candidate' => array(
'label' => 'Run DMC-owned lifecycle reconciliation before cleanup eligibility',
'command' => 'studio wp datamachine-code workspace worktree cleanup --dry-run --format=json',
Expand Down Expand Up @@ -3527,6 +3560,71 @@ private function detect_worktree_artifacts( string $repo, string $path ): array
return $rows;
}

/**
* Classify dirty worktrees whose dirty paths are only generated artifacts.
*
* @param string $repo Repo name.
* @param string $path Worktree path.
* @return array{paths: array<int,string>, artifacts: array<int,array<string,mixed>>, artifact_size_bytes: int}|null Artifact-only classification.
*/
private function classify_artifact_only_dirty_worktree( string $repo, string $path ): ?array {
if ( '' === $repo || '' === $path || ! is_dir($path) ) {
return null;
}

$artifacts = $this->detect_worktree_artifacts($repo, $path);
if ( array() === $artifacts ) {
return null;
}

$status = $this->run_git($path, 'status --porcelain', self::CLEANUP_GIT_PROBE_TIMEOUT);
if ( is_wp_error($status) ) {
return null;
}

$dirty_paths = array();
foreach ( explode("\n", (string) ( $status['output'] ?? '' )) as $line ) {
$line = rtrim($line, "\r");
if ( '' === $line ) {
continue;
}

$path_part = trim(substr($line, 3));
if ( str_contains($path_part, ' -> ') ) {
$path_part = trim( (string) substr(strrchr($path_part, '>'), 1));
}
$path_part = trim($path_part, ' /');
if ( '' === $path_part ) {
return null;
}
$dirty_paths[] = $path_part;
}

if ( array() === $dirty_paths ) {
return null;
}

$artifact_paths = array_map(fn( $artifact ) => trim( (string) ( $artifact['path'] ?? '' ), '/'), $artifacts);
foreach ( $dirty_paths as $dirty_path ) {
$matched = false;
foreach ( $artifact_paths as $artifact_path ) {
if ( '' !== $artifact_path && ( $dirty_path === $artifact_path || str_starts_with($dirty_path, $artifact_path . '/') ) ) {
$matched = true;
break;
}
}
if ( ! $matched ) {
return null;
}
}

return array(
'paths' => $dirty_paths,
'artifacts' => $artifacts,
'artifact_size_bytes' => array_sum(array_map(fn( $artifact ) => (int) ( $artifact['size_bytes'] ?? 0 ), $artifacts)),
);
}

/**
* Resolve repo-specific artifact profile paths.
*
Expand All @@ -3543,6 +3641,7 @@ private function get_worktree_artifact_profile( string $repo, string $path ): ar

if ( is_file(rtrim($path, '/') . '/package.json') ) {
$profile['node_modules'] = 'Node dependencies';
$profile['build'] = 'JavaScript build output';
$profile['.next'] = 'Next.js build cache';
$profile['dist'] = 'JavaScript build output';
$profile['coverage'] = 'test coverage output';
Expand Down
36 changes: 32 additions & 4 deletions tests/smoke-worktree-cleanup.php
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,10 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra
$run('git config user.name test', $primary);
file_put_contents($primary . '/README.md', "demo\n");
file_put_contents($primary . '/Cargo.toml', "[package]\nname = \"demo\"\nversion = \"0.1.0\"\n");
file_put_contents($primary . '/package.json', "{\"scripts\":{\"build\":\"echo build\"}}\n");
file_put_contents($primary . '/.gitignore', "target/\n");
$run('git add README.md && git commit -m init', $primary);
$run('git add Cargo.toml .gitignore && git commit -m tooling', $primary);
$run('git add Cargo.toml package.json .gitignore && git commit -m tooling', $primary);
$run('git branch -M main', $primary);
$run('git push -u origin main', $primary);

Expand All @@ -243,6 +244,8 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra
$make_branch('merged-live-remote', 'd'); // → simulate via PR-merged (stubbed → none)
$make_branch('unmerged-feature', 'e'); // still active
$make_branch('dirty-branch', 'f'); // will be dirty in worktree
$make_branch('artifact-only-dirty', 'f2'); // will be dirty only from build/
$make_branch('mixed-artifact-dirty', 'f3'); // will have build/ plus a source edit
$make_branch('external-branch', 'g'); // outside workspace, should never be removed

// Create worktrees at various paths:
Expand All @@ -257,12 +260,19 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra
$run(sprintf('git worktree add %s merged-live-remote', escapeshellarg($tmp . '/demo-unmanaged-merged')), $primary);
$run(sprintf('git worktree add %s unmerged-feature', escapeshellarg($tmp . '/demo@unmerged-feature')), $primary);
$run(sprintf('git worktree add %s dirty-branch', escapeshellarg($tmp . '/demo@dirty-branch')), $primary);
$run(sprintf('git worktree add %s artifact-only-dirty', escapeshellarg($tmp . '/demo@artifact-only-dirty')), $primary);
$run(sprintf('git worktree add %s mixed-artifact-dirty', escapeshellarg($tmp . '/demo@mixed-artifact-dirty')), $primary);
mkdir($tmp . '-external', 0755, true);
$run(sprintf('git worktree add %s external-branch', escapeshellarg($tmp . '-external/demo-external')), $primary);
$external_real = realpath($tmp . '-external/demo-external') ? realpath($tmp . '-external/demo-external') : $tmp . '-external/demo-external';

// Dirty the dirty worktree.
file_put_contents($tmp . '/demo@dirty-branch/scratch.txt', 'dirty');
mkdir($tmp . '/demo@artifact-only-dirty/build', 0755, true);
file_put_contents($tmp . '/demo@artifact-only-dirty/build/output.js', 'artifact');
mkdir($tmp . '/demo@mixed-artifact-dirty/build', 0755, true);
file_put_contents($tmp . '/demo@mixed-artifact-dirty/build/output.js', 'artifact');
file_put_contents($tmp . '/demo@mixed-artifact-dirty/README.md', "source edit\n");
mkdir($tmp . '/demo@merged-autodelete/target', 0755, true);
file_put_contents($tmp . '/demo@merged-autodelete/target/artifact.bin', str_repeat('x', 4096));

Expand Down Expand Up @@ -352,6 +362,8 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra
$run(sprintf('git --git-dir=%s update-ref -d refs/heads/merged-stale-plan', escapeshellarg($remote)));
$run(sprintf('git --git-dir=%s update-ref -d refs/heads/merged-recent', escapeshellarg($remote)));
$run(sprintf('git --git-dir=%s update-ref -d refs/heads/merged-unknown-age', escapeshellarg($remote)));
$run(sprintf('git --git-dir=%s update-ref -d refs/heads/artifact-only-dirty', escapeshellarg($remote)));
$run(sprintf('git --git-dir=%s update-ref -d refs/heads/mixed-artifact-dirty', escapeshellarg($remote)));

// -------------------------------------------------------------------------
// Dry-run assertions
Expand Down Expand Up @@ -394,6 +406,19 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra
$assert(true, str_contains($dirty_reason, 'dirty'), 'dirty skip reason mentions dirty');
$assert('dirty_worktree', $dirty_row['reason_code'] ?? '', 'dirty skip exposes stable reason code');

$artifact_dirty_skips = array_filter($plan['skipped'] ?? array(), fn( $s ) => ( $s['handle'] ?? '' ) === 'demo@artifact-only-dirty');
$assert(1, count($artifact_dirty_skips), 'artifact-only dirty worktree skipped with exactly one entry');
$artifact_dirty_row = array_values($artifact_dirty_skips)[0] ?? array();
$assert('artifact_only_dirty_worktree', $artifact_dirty_row['reason_code'] ?? '', 'artifact-only dirt exposes stable reason code');
$assert(array( 'build' ), $artifact_dirty_row['artifact_dirty_paths'] ?? array(), 'artifact-only dirty row reports dirty artifact path');
$assert('build', $artifact_dirty_row['artifacts'][0]['path'] ?? '', 'artifact-only dirty row reports matching artifact profile path');
$assert(true, str_contains($artifact_dirty_row['hint'] ?? '', 'cleanup-artifacts --dry-run'), 'artifact-only dirty row points to artifact cleanup lane');

$mixed_dirty_skips = array_filter($plan['skipped'] ?? array(), fn( $s ) => ( $s['handle'] ?? '' ) === 'demo@mixed-artifact-dirty');
$assert(1, count($mixed_dirty_skips), 'mixed source/artifact dirty worktree skipped with exactly one entry');
$mixed_dirty_row = array_values($mixed_dirty_skips)[0] ?? array();
$assert('dirty_worktree', $mixed_dirty_row['reason_code'] ?? '', 'mixed source/artifact dirt stays protected as generic dirty worktree');

// unmerged-feature should be skipped (no merge signal)
$unmerged = array_filter($plan['skipped'] ?? array(), fn( $s ) => ( $s['handle'] ?? '' ) === 'demo@unmerged-feature');
$assert(1, count($unmerged), 'unmerged worktree skipped with exactly one entry');
Expand All @@ -410,7 +435,10 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra
$assert(true, str_contains($external_row['hint'] ?? '', 'outside the DMC workspace'), 'external worktree includes remediation hint');

$assert(4, (int) ( $plan['summary']['would_remove'] ?? 0 ), 'summary counts cleanup candidates');
$assert(1, (int) ( $plan['summary']['skipped_by_reason']['dirty_worktree'] ?? 0 ), 'summary counts dirty skips by reason');
$assert(2, (int) ( $plan['summary']['skipped_by_reason']['dirty_worktree'] ?? 0 ), 'summary counts dirty skips by reason');
$assert(1, (int) ( $plan['summary']['skipped_by_reason']['artifact_only_dirty_worktree'] ?? 0 ), 'summary counts artifact-only dirty skips separately');
$assert(1, (int) ( $plan['summary']['cleanup_buckets']['artifact_only_dirty_worktree'] ?? 0 ), 'cleanup buckets expose artifact-only dirty count');
$assert(true, in_array('artifact_only_dirty_worktree', array_column($plan['summary']['skipped_next_commands'] ?? array(), 'reason_code'), true), 'summary exposes artifact-only dirty next command');
$assert(true, isset($plan['summary']['skipped_by_reason']['no_merge_signal']), 'summary includes no_merge_signal bucket');
$next_commands = (array) ( $plan['summary']['skipped_next_commands'] ?? array() );
$assert(true, in_array('no_merge_signal', array_column($next_commands, 'reason_code'), true), 'summary includes no_merge_signal next command');
Expand Down Expand Up @@ -444,12 +472,12 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra
$assert('needs_metadata_reconcile', $inventory_missing['reason_code'] ?? '', 'inventory-only missing metadata requires metadata reconciliation');
$inventory_buckets = (array) ( $inventory_plan['summary']['cleanup_buckets'] ?? array() );
$assert(2, (int) ( $inventory_buckets['safe_to_remove_now'] ?? 0 ), 'inventory cleanup bucket counts safe-to-remove candidates');
$assert(5, (int) ( $inventory_buckets['needs_reconciliation'] ?? 0 ), 'inventory cleanup bucket counts reconciliation candidates separately');
$assert(7, (int) ( $inventory_buckets['needs_reconciliation'] ?? 0 ), 'inventory cleanup bucket counts reconciliation candidates separately');
$assert(4, (int) ( $inventory_buckets['needs_full_review'] ?? 0 ), 'inventory cleanup bucket counts full-review rows separately');
$assert(0, (int) ( $inventory_buckets['blocked_by_dirty_or_unpushed'] ?? -1 ), 'inventory cleanup bucket keeps dirty/unpushed blockers separate');
$assert(2, (int) ( $inventory_buckets['explicit_cleanup_candidates'] ?? 0 ), 'inventory cleanup bucket counts explicit cleanup candidates');
$assert(1, (int) ( $inventory_buckets['lifecycle_reconciliation_candidates'] ?? 0 ), 'inventory cleanup bucket counts lifecycle reconciliation candidates');
$assert(4, (int) ( $inventory_buckets['metadata_reconciliation_candidates'] ?? 0 ), 'inventory cleanup bucket counts metadata reconciliation candidates');
$assert(6, (int) ( $inventory_buckets['metadata_reconciliation_candidates'] ?? 0 ), 'inventory cleanup bucket counts metadata reconciliation candidates');
$assert(4, (int) ( $inventory_buckets['active_no_signal'] ?? 0 ), 'inventory cleanup bucket counts active/no-signal rows');
$inventory_apply_hint = (array) ( $inventory_plan['summary']['bounded_cleanup_eligible_apply'] ?? array() );
$assert('studio wp datamachine-code workspace worktree bounded-cleanup-eligible-apply --limit=2', $inventory_plan['summary']['apply_command'] ?? '', 'inventory-only dry-run exposes bounded cleanup-eligible apply command');
Expand Down
Loading