Skip to content

Commit 07a7a06

Browse files
authored
Merge branch 'master' into MTHINC
2 parents 85acf5a + d8c479f commit 07a7a06

25 files changed

Lines changed: 1641 additions & 270 deletions

.github/pull_request_template.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,5 @@ Reviews are not triggered automatically. To request a review, comment on the PR:
3838
- `@coderabbitai full review` — full review from scratch
3939
- `/review` — Qodo review
4040
- `/improve` — Qodo code suggestions
41+
- `@claude full review` — Claude full review (also triggers on PR open/reopen/ready)
42+
- Add label `claude-full-review` — Claude full review via label

.github/workflows/claude-code-review.yml

Lines changed: 125 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,31 @@ name: Claude Code Review
22

33
on:
44
pull_request_target:
5-
types: [opened, synchronize, ready_for_review, reopened]
5+
types: [opened, synchronize, ready_for_review, reopened, labeled]
6+
issue_comment:
7+
types: [created]
68

79
jobs:
810
claude-review:
11+
if: >
12+
(
13+
github.event_name == 'pull_request_target' &&
14+
(
15+
github.event.action == 'opened' ||
16+
github.event.action == 'ready_for_review' ||
17+
github.event.action == 'reopened' ||
18+
github.event.action == 'synchronize' ||
19+
(
20+
github.event.action == 'labeled' &&
21+
github.event.label.name == 'claude-full-review'
22+
)
23+
)
24+
) ||
25+
(
26+
github.event_name == 'issue_comment' &&
27+
github.event.issue.pull_request != null &&
28+
contains(github.event.comment.body, '@claude full review')
29+
)
930
runs-on: ubuntu-latest
1031
permissions:
1132
contents: read
@@ -20,12 +41,37 @@ jobs:
2041
sudo apt-get update
2142
sudo apt-get install -y unzip
2243
23-
# IMPORTANT: checkout BASE repo only (safe on forks)
44+
# Base checkout only
2445
- name: Checkout base repo (safe)
2546
uses: actions/checkout@v4
2647
with:
2748
fetch-depth: 1
2849

50+
- name: Determine PR number and review mode
51+
id: mode
52+
shell: bash
53+
run: |
54+
if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
55+
PR_NUMBER="${{ github.event.pull_request.number }}"
56+
if [[ "${{ github.event.action }}" == "opened" || "${{ github.event.action }}" == "ready_for_review" || "${{ github.event.action }}" == "reopened" ]]; then
57+
REVIEW_MODE="full"
58+
elif [[ "${{ github.event.action }}" == "synchronize" ]]; then
59+
REVIEW_MODE="incremental"
60+
elif [[ "${{ github.event.action }}" == "labeled" && "${{ github.event.label.name }}" == "claude-full-review" ]]; then
61+
REVIEW_MODE="full"
62+
else
63+
REVIEW_MODE="full"
64+
fi
65+
elif [[ "${{ github.event_name }}" == "issue_comment" ]]; then
66+
PR_NUMBER="${{ github.event.issue.number }}"
67+
REVIEW_MODE="full"
68+
else
69+
REVIEW_MODE="full"
70+
fi
71+
72+
echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
73+
echo "review_mode=${REVIEW_MODE:-full}" >> "$GITHUB_OUTPUT"
74+
2975
- name: Run Claude Code Review
3076
uses: anthropics/claude-code-action@v1
3177
with:
@@ -35,36 +81,100 @@ jobs:
3581
plugin_marketplaces: "https://github.com/anthropics/claude-code.git"
3682
plugins: "code-review@claude-code-plugins"
3783

38-
# NOTE: do NOT use --dangerouslyDisableSandbox (it can crash the CLI).
39-
# This flag is for non-interactive CI runs (bypasses approval prompts).
4084
claude_args: >
4185
--dangerously-skip-permissions
4286
--max-turns 90
4387
--allowedTools
4488
"Bash"
4589
4690
prompt: |
47-
You are running in pull_request_target.
91+
You are running in pull_request_target / issue_comment automation.
92+
93+
REVIEW MODE: ${{ steps.mode.outputs.review_mode }}
94+
PR NUMBER: ${{ steps.mode.outputs.pr_number }}
95+
4896
DO NOT read or inspect any checked-out PR/fork code. Review ONLY using GitHub API/gh commands.
4997
5098
You may read local guidance ONLY from:
5199
- ./CLAUDE.md (root) if present
52100
- ./.claude/rules/*.md if present (max 10 files)
53101
54102
Keep tool calls minimal and in this order:
103+
104+
Phase 1 — Local guidance (base branch only, safe):
55105
1) ls -1 .claude/rules 2>/dev/null || true
56106
2) cat CLAUDE.md 2>/dev/null || true
57107
3) find .claude/rules -maxdepth 1 -name "*.md" -print | head -n 10 | xargs -I{} cat "{}" 2>/dev/null || true
58-
4) gh pr view ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --json title,body,files,changedFiles,additions,deletions,headRefOid
59-
5) gh pr diff ${{ github.event.pull_request.number }} --repo ${{ github.repository }}
60-
6) Post ONE top-level PR comment titled "Claude Code Review", then STOP.
61-
62-
Output format:
63-
- Head SHA
64-
- Files changed count + list up to 10 file paths
65-
- Summary (3–6 bullets, minimal)
66-
- Findings with file + line numbers when possible
67-
- If no issues: 0–3 improvement opportunities (only if confident)
108+
109+
Phase 2 — PR metadata and diff:
110+
4) gh pr view ${{ steps.mode.outputs.pr_number }} --repo ${{ github.repository }} --json title,body,files,changedFiles,additions,deletions,headRefOid,comments
111+
5) gh pr diff ${{ steps.mode.outputs.pr_number }} --repo ${{ github.repository }}
112+
113+
Phase 3 — Full file context (read via GitHub API, NOT local checkout):
114+
After reviewing the diff, fetch full contents of changed files to understand
115+
surrounding context. This is critical for catching issues the diff alone hides
116+
(e.g., duplicate code, broken callers, missing cleanup, variable shadowing).
117+
118+
Use this pattern to fetch file contents at the PR head SHA:
119+
gh api repos/${{ github.repository }}/contents/{path}?ref={head_sha} --jq '.content' | base64 -d
120+
121+
Rules for Phase 3:
122+
- Get the head SHA from step 4's headRefOid field.
123+
- Fetch up to 15 changed files (skip files >500 lines or binary files).
124+
- Prioritize: source code (.fpp, .f90, .py, .yml) over docs/config.
125+
- For Fortran/Fypp files: also fetch files that the changed file imports
126+
(look for "use m_<name>" or "#:include" in the fetched content) if they
127+
seem relevant to the review. Limit to 5 additional related files.
128+
- Do NOT fetch files that are unchanged and unrelated to the diff.
129+
- If a file fetch fails (404, too large), skip it and continue.
130+
131+
Review policy:
132+
- FULL mode:
133+
- Review the current PR normally.
134+
- Post or update ONE top-level PR comment titled "Claude Code Review".
135+
- INCREMENTAL mode:
136+
- Find the most recent prior Claude review comment on this PR.
137+
- Look for a hidden marker in the form:
138+
<!-- claude-review: reviewed_sha=<sha>; mode=<mode> -->
139+
- Compare the prior reviewed SHA to the current head SHA.
140+
- Review ONLY for newly introduced issues since the previous Claude-reviewed SHA.
141+
- DO NOT repeat earlier findings.
142+
- DO NOT restate the full PR summary.
143+
- If there are no new high-confidence findings, DO NOT post a new comment. STOP.
144+
- If there are new findings, update the existing Claude review comment if possible; otherwise post one new top-level comment.
145+
146+
Re-review policy:
147+
- A full review is explicitly requested only when:
148+
- the workflow was triggered by PR label "claude-full-review", or
149+
- the workflow was triggered by an issue comment containing "@claude full review"
150+
151+
Output format for FULL mode:
152+
Claude Code Review
153+
154+
Head SHA: <sha>
155+
156+
Files changed:
157+
- <count>
158+
- <up to 10 paths>
159+
160+
Summary:
161+
- <3-6 minimal bullets>
162+
163+
Findings:
164+
- <file + line numbers when possible>
165+
- <minimal, high-confidence only>
166+
167+
Output format for INCREMENTAL mode:
168+
Claude Code Review
169+
170+
Incremental review from: <previous_sha>
171+
Head SHA: <current_sha>
172+
173+
New findings since last Claude review:
174+
- <only genuinely new issues, file + line numbers when possible>
175+
176+
When posting a comment, include this hidden marker at the end:
177+
<!-- claude-review: reviewed_sha=<current_head_sha>; mode=${{ steps.mode.outputs.review_mode }} -->
68178
69179
If posting is blocked, write the full review to the GitHub Actions job summary instead, then STOP.
70180

.github/workflows/claude.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,6 @@ jobs:
3131
uses: anthropics/claude-code-action@v1
3232
with:
3333
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
34+
claude_args: "--dangerously-skip-permissions"
3435
additional_permissions: |
3536
actions: read

misc/runners/common/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Common Runner Management Scripts
2+
3+
Site-agnostic scripts shared between the Frontier and Phoenix runner setups.
4+
All shared logic lives here; site directories contain only site-specific files
5+
(`config.sh` and scripts unique to that cluster).
6+
7+
Scripts are invoked via the dispatcher at `misc/runners/runner.sh`:
8+
```bash
9+
bash misc/runners/runner.sh <site> <command> [args...]
10+
```
11+
12+
## Scripts
13+
14+
| Script | Purpose |
15+
|---|---|
16+
| `runner-lib.sh` | Shared library: GitHub API helpers, EXE-based process discovery, parallel node sweep, start/stop primitives. Sourced by site `config.sh` files. |
17+
| `check-runners.sh` | Per-node health check: Runner.Listener processes with name, idle/BUSY, slurm PATH, RSS. Optional cgroup memory footer. |
18+
| `list-runners.sh` | Full table: GitHub API status × parallel node sweep. Shows slurm status, flags stale `runner.node`. |
19+
| `rebalance-runners.sh` | Compute optimal distribution and move runners across nodes. Handles offline runners. Writes `runner.node`. Dry run by default. |
20+
| `restart-runner.sh` | Stop and restart one runner on a given node. Verifies slurm in PATH. Writes `runner.node`. |
21+
| `restart-all.sh` | Restart all runners in place. Skips busy unless `FORCE=1`. Dry run by default. |
22+
| `move-runner.sh` | Move a runner to a different login node by name. Stops on current node, starts on target. Writes `runner.node`. |
23+
| `stop-runner.sh` | Stop a runner process and remove its GitHub registration. |
24+
| `rerun-failed.sh` | Rerun failed GitHub Actions workflows on open non-draft PRs and master. Dry run by default. |
25+
| `create-runner.sh` | Download, register, and start a new runner. Requires `runner_install_dir()` and `TARBALL_CACHE_DIR` from site config. Usage: `create-runner <name> <node> [install-dir]` |
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env bash
2+
# Check runner health across all login nodes.
3+
#
4+
# Sourced by site wrappers (frontier/check-runners.sh, phoenix/check-runners.sh)
5+
# after config.sh is loaded. Shows Runner.Listener processes per node with
6+
# name, busy/idle status, slurm availability, and RSS memory.
7+
# If CGROUP_LIMIT > 0, also shows per-node total memory vs the cgroup limit.
8+
#
9+
# Usage: bash check-runners.sh
10+
set -euo pipefail
11+
12+
declare -f sync_runner_nodes > /dev/null 2>&1 && {
13+
echo "==> Syncing runner node locations..."
14+
sync_runner_nodes
15+
}
16+
17+
for node in "${NODES[@]}"; do
18+
echo "=== $node ==="
19+
ssh $SSH_OPTS "$node" '
20+
found=0
21+
for p in $(ps aux | grep Runner.Listener | grep -v grep | awk "{print \$2}"); do
22+
found=1
23+
exe=$(readlink -f /proc/$p/exe 2>/dev/null || echo "???")
24+
dir=$(dirname "$(dirname "$exe")" 2>/dev/null || echo "???")
25+
name=$(basename "$dir")
26+
worker=$(ps aux | grep "Runner.Worker" | grep "$dir" | grep -v grep | awk "{print \$2}" | head -1)
27+
[ -n "$worker" ] && status="BUSY" || status="idle"
28+
rss=$(ps -p $p -o rss= 2>/dev/null | awk "{printf \"%.0f\", \$1/1024}" || echo "?")
29+
slurm=$(tr "\0" "\n" < /proc/$p/environ 2>/dev/null | grep -c "^PATH=.*slurm" || echo 0)
30+
[ "$slurm" -gt 0 ] && slurm_ok="ok" || slurm_ok="MISSING"
31+
printf " %-30s %5s slurm=%-7s %s MB\n" "$name" "$status" "$slurm_ok" "$rss"
32+
done
33+
[ "$found" -eq 0 ] && echo " (no runners)"
34+
' 2>/dev/null || echo " (unreachable)"
35+
36+
if [ "${CGROUP_LIMIT:-0}" -gt 0 ]; then
37+
rss=$(ssh $SSH_OPTS "$node" \
38+
"ps -u \$(whoami) -o rss= 2>/dev/null | awk '{sum+=\$1} END {printf \"%.0f\", sum/1024}'" \
39+
2>/dev/null || echo "?")
40+
[[ "$rss" =~ ^[0-9]+$ ]] || rss=0
41+
echo " --- Total: ${rss} MB / ${CGROUP_LIMIT} MB ($(( CGROUP_LIMIT - rss )) MB free) ---"
42+
fi
43+
echo ""
44+
done
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
#!/usr/bin/env bash
2+
# Create, register, and start a GitHub Actions runner.
3+
#
4+
# Sourced by misc/runners/runner.sh after config is loaded.
5+
# Config must define runner_install_dir() and may set TARBALL_CACHE_DIR.
6+
#
7+
# runner_install_dir <name> [override-dir]
8+
# Returns the directory where the runner should be installed.
9+
# If override-dir is given it is used directly; otherwise the site
10+
# computes the path (e.g. SHARED_DIR/<name> on Frontier, or an
11+
# auto-numbered actions-runner-N/ directory on Phoenix).
12+
#
13+
# TARBALL_CACHE_DIR
14+
# If non-empty, the runner tarball is cached here and reused across
15+
# installs (useful on Frontier where shared Lustre is visible from all
16+
# login nodes). If empty or unset, a fresh download is made for each
17+
# runner and the temporary file is removed after extraction.
18+
#
19+
# Usage: runner.sh <site> create-runner <name> <node> [install-dir]
20+
# name Runner name (e.g. frontier-23, phoenix-11)
21+
# node Login node to start the runner on
22+
# install-dir Optional: override the computed installation directory
23+
set -euo pipefail
24+
25+
RUNNER_NAME="${1:?Usage: create-runner <name> <node> [install-dir]}"
26+
TARGET_NODE="${2:?Usage: create-runner <name> <node> [install-dir]}"
27+
INSTALL_DIR_OVERRIDE="${3:-}"
28+
29+
RUNNER_DIR=$(runner_install_dir "$RUNNER_NAME" "$INSTALL_DIR_OVERRIDE")
30+
RUNNER_VERSION="${RUNNER_VERSION:-$(gh_latest_runner_version 2>/dev/null || echo "2.332.0")}"
31+
TARBALL="actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz"
32+
TARBALL_URL="https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/${TARBALL}"
33+
34+
echo "=== Creating runner ==="
35+
echo " Name: $RUNNER_NAME"
36+
echo " Node: $TARGET_NODE"
37+
echo " Directory: $RUNNER_DIR"
38+
echo " Org: $ORG"
39+
echo " Group: $RUNNER_GROUP"
40+
echo " Label: $RUNNER_LABEL"
41+
echo " Version: $RUNNER_VERSION"
42+
echo ""
43+
44+
if [ -d "$RUNNER_DIR" ]; then
45+
echo "ERROR: Directory already exists: $RUNNER_DIR" >&2
46+
exit 1
47+
fi
48+
49+
# --- Download tarball ---
50+
if [ -n "${TARBALL_CACHE_DIR:-}" ]; then
51+
if [ ! -f "$TARBALL_CACHE_DIR/$TARBALL" ]; then
52+
echo "==> Downloading runner v${RUNNER_VERSION} to cache..."
53+
tmp="$TARBALL_CACHE_DIR/$TARBALL.tmp.$$"
54+
curl -fsSL "$TARBALL_URL" -o "$tmp"
55+
mv "$tmp" "$TARBALL_CACHE_DIR/$TARBALL"
56+
fi
57+
tarball_path="$TARBALL_CACHE_DIR/$TARBALL"
58+
else
59+
echo "==> Downloading runner v${RUNNER_VERSION}..."
60+
mkdir -p "$RUNNER_DIR"
61+
tarball_path="$RUNNER_DIR/runner-download.tmp.$$"
62+
curl -fsSL "$TARBALL_URL" -o "$tarball_path"
63+
fi
64+
65+
# --- Extract ---
66+
mkdir -p "$RUNNER_DIR"
67+
echo "==> Extracting into $RUNNER_DIR..."
68+
tar xzf "$tarball_path" -C "$RUNNER_DIR"
69+
[ -z "${TARBALL_CACHE_DIR:-}" ] && rm -f "$tarball_path"
70+
71+
if [ ! -f "$RUNNER_DIR/run.sh" ]; then
72+
echo "ERROR: Extraction failed — run.sh not found in $RUNNER_DIR" >&2
73+
exit 1
74+
fi
75+
76+
# --- Register ---
77+
echo "==> Fetching registration token..."
78+
token=$(gh_registration_token)
79+
if [ -z "$token" ]; then
80+
echo "ERROR: Failed to get registration token." >&2
81+
echo " Run: gh auth refresh -h github.com -s admin:org" >&2
82+
exit 1
83+
fi
84+
85+
echo "==> Configuring runner..."
86+
"$RUNNER_DIR/config.sh" \
87+
--url "https://github.com/$ORG" \
88+
--token "$token" \
89+
--name "$RUNNER_NAME" \
90+
--runnergroup "$RUNNER_GROUP" \
91+
--labels "$RUNNER_LABEL" \
92+
--work "_work" \
93+
--unattended \
94+
--replace
95+
echo "==> Configured."
96+
97+
# --- Start ---
98+
echo "==> Starting on $TARGET_NODE..."
99+
if start_runner "$TARGET_NODE" "$RUNNER_DIR"; then
100+
echo "$TARGET_NODE" > "$RUNNER_DIR/runner.node"
101+
pids=$(find_pids "$TARGET_NODE" "$RUNNER_DIR")
102+
pid=${pids%% *}
103+
if has_slurm "$TARGET_NODE" "$pid"; then
104+
echo "==> OK: $RUNNER_NAME running on $TARGET_NODE (PID $pid, slurm in PATH)"
105+
else
106+
echo "==> WARNING: $RUNNER_NAME running on $TARGET_NODE (PID $pid) but slurm MISSING from PATH"
107+
fi
108+
else
109+
echo "ERROR: $RUNNER_NAME did not start on $TARGET_NODE" >&2
110+
exit 1
111+
fi
112+
113+
echo ""
114+
echo "==> Log: $RUNNER_DIR/runner.log"

0 commit comments

Comments
 (0)