MFlowCode
diff --git a/‎.github/pull_request_template.md‎
Lines changed: 2 additions & 0 deletions b/‎.github/pull_request_template.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/claude-code-review.yml‎
Lines changed: 125 additions & 15 deletions b/‎.github/workflows/claude-code-review.yml‎
Lines changed: 125 additions & 15 deletions
diff --git a/‎.github/workflows/claude.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/claude.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎misc/runners/common/README.md‎
Lines changed: 25 additions & 0 deletions b/‎misc/runners/common/README.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎misc/runners/common/check-runners.sh‎
Lines changed: 44 additions & 0 deletions b/‎misc/runners/common/check-runners.sh‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎misc/runners/common/create-runner.sh‎
Lines changed: 114 additions & 0 deletions b/‎misc/runners/common/create-runner.sh‎
Lines changed: 114 additions & 0 deletions
@@ -38,3 +38,5 @@ Reviews are not triggered automatically. To request a review, comment on the PR:
 - `@coderabbitai full review` — full review from scratch
 - `/review` — Qodo review
 - `/improve` — Qodo code suggestions
+- `@claude full review` — Claude full review (also triggers on PR open/reopen/ready)
+- Add label `claude-full-review` — Claude full review via label
@@ -2,10 +2,31 @@ name: Claude Code Review
 
 on:
   pull_request_target:
-    types: [opened, synchronize, ready_for_review, reopened]
+    types: [opened, synchronize, ready_for_review, reopened, labeled]
+  issue_comment:
+    types: [created]
 
 jobs:
   claude-review:
+    if: >
+      (
+        github.event_name == 'pull_request_target' &&
+        (
+          github.event.action == 'opened' ||
+          github.event.action == 'ready_for_review' ||
+          github.event.action == 'reopened' ||
+          github.event.action == 'synchronize' ||
+          (
+            github.event.action == 'labeled' &&
+            github.event.label.name == 'claude-full-review'
+          )
+        )
+      ) ||
+      (
+        github.event_name == 'issue_comment' &&
+        github.event.issue.pull_request != null &&
+        contains(github.event.comment.body, '@claude full review')
+      )
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -20,12 +41,37 @@ jobs:
           sudo apt-get update
           sudo apt-get install -y unzip
 
-      # IMPORTANT: checkout BASE repo only (safe on forks)
+      # Base checkout only
       - name: Checkout base repo (safe)
         uses: actions/checkout@v4
         with:
           fetch-depth: 1
 
+      - name: Determine PR number and review mode
+        id: mode
+        shell: bash
+        run: |
+          if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
+            PR_NUMBER="${{ github.event.pull_request.number }}"
+            if [[ "${{ github.event.action }}" == "opened" || "${{ github.event.action }}" == "ready_for_review" || "${{ github.event.action }}" == "reopened" ]]; then
+              REVIEW_MODE="full"
+            elif [[ "${{ github.event.action }}" == "synchronize" ]]; then
+              REVIEW_MODE="incremental"
+            elif [[ "${{ github.event.action }}" == "labeled" && "${{ github.event.label.name }}" == "claude-full-review" ]]; then
+              REVIEW_MODE="full"
+            else
+              REVIEW_MODE="full"
+            fi
+          elif [[ "${{ github.event_name }}" == "issue_comment" ]]; then
+            PR_NUMBER="${{ github.event.issue.number }}"
+            REVIEW_MODE="full"
+          else
+            REVIEW_MODE="full"
+          fi
+
+          echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
+          echo "review_mode=${REVIEW_MODE:-full}" >> "$GITHUB_OUTPUT"
+
       - name: Run Claude Code Review
         uses: anthropics/claude-code-action@v1
         with:
@@ -35,36 +81,100 @@ jobs:
           plugin_marketplaces: "https://github.com/anthropics/claude-code.git"
           plugins: "code-review@claude-code-plugins"
 
-          # NOTE: do NOT use --dangerouslyDisableSandbox (it can crash the CLI).
-          # This flag is for non-interactive CI runs (bypasses approval prompts).
           claude_args: >
             --dangerously-skip-permissions
             --max-turns 90
             --allowedTools
             "Bash"
 
           prompt: |
-            You are running in pull_request_target.
+            You are running in pull_request_target / issue_comment automation.
+
+            REVIEW MODE: ${{ steps.mode.outputs.review_mode }}
+            PR NUMBER: ${{ steps.mode.outputs.pr_number }}
+
             DO NOT read or inspect any checked-out PR/fork code. Review ONLY using GitHub API/gh commands.
 
             You may read local guidance ONLY from:
             - ./CLAUDE.md (root) if present
             - ./.claude/rules/*.md if present (max 10 files)
 
             Keep tool calls minimal and in this order:
+
+            Phase 1 — Local guidance (base branch only, safe):
             1) ls -1 .claude/rules 2>/dev/null || true
             2) cat CLAUDE.md 2>/dev/null || true
             3) find .claude/rules -maxdepth 1 -name "*.md" -print | head -n 10 | xargs -I{} cat "{}" 2>/dev/null || true
-            4) gh pr view ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --json title,body,files,changedFiles,additions,deletions,headRefOid
-            5) gh pr diff ${{ github.event.pull_request.number }} --repo ${{ github.repository }}
-            6) Post ONE top-level PR comment titled "Claude Code Review", then STOP.
-
-            Output format:
-            - Head SHA
-            - Files changed count + list up to 10 file paths
-            - Summary (3–6 bullets, minimal)
-            - Findings with file + line numbers when possible
-            - If no issues: 0–3 improvement opportunities (only if confident)
+
+            Phase 2 — PR metadata and diff:
+            4) gh pr view ${{ steps.mode.outputs.pr_number }} --repo ${{ github.repository }} --json title,body,files,changedFiles,additions,deletions,headRefOid,comments
+            5) gh pr diff ${{ steps.mode.outputs.pr_number }} --repo ${{ github.repository }}
+
+            Phase 3 — Full file context (read via GitHub API, NOT local checkout):
+            After reviewing the diff, fetch full contents of changed files to understand
+            surrounding context. This is critical for catching issues the diff alone hides
+            (e.g., duplicate code, broken callers, missing cleanup, variable shadowing).
+
+            Use this pattern to fetch file contents at the PR head SHA:
+              gh api repos/${{ github.repository }}/contents/{path}?ref={head_sha} --jq '.content' | base64 -d
+
+            Rules for Phase 3:
+            - Get the head SHA from step 4's headRefOid field.
+            - Fetch up to 15 changed files (skip files >500 lines or binary files).
+            - Prioritize: source code (.fpp, .f90, .py, .yml) over docs/config.
+            - For Fortran/Fypp files: also fetch files that the changed file imports
+              (look for "use m_<name>" or "#:include" in the fetched content) if they
+              seem relevant to the review. Limit to 5 additional related files.
+            - Do NOT fetch files that are unchanged and unrelated to the diff.
+            - If a file fetch fails (404, too large), skip it and continue.
+
+            Review policy:
+            - FULL mode:
+              - Review the current PR normally.
+              - Post or update ONE top-level PR comment titled "Claude Code Review".
+            - INCREMENTAL mode:
+              - Find the most recent prior Claude review comment on this PR.
+              - Look for a hidden marker in the form:
+                <!-- claude-review: reviewed_sha=<sha>; mode=<mode> -->
+              - Compare the prior reviewed SHA to the current head SHA.
+              - Review ONLY for newly introduced issues since the previous Claude-reviewed SHA.
+              - DO NOT repeat earlier findings.
+              - DO NOT restate the full PR summary.
+              - If there are no new high-confidence findings, DO NOT post a new comment. STOP.
+              - If there are new findings, update the existing Claude review comment if possible; otherwise post one new top-level comment.
+
+            Re-review policy:
+            - A full review is explicitly requested only when:
+              - the workflow was triggered by PR label "claude-full-review", or
+              - the workflow was triggered by an issue comment containing "@claude full review"
+
+            Output format for FULL mode:
+            Claude Code Review
+
+            Head SHA: <sha>
+
+            Files changed:
+            - <count>
+            - <up to 10 paths>
+
+            Summary:
+            - <3-6 minimal bullets>
+
+            Findings:
+            - <file + line numbers when possible>
+            - <minimal, high-confidence only>
+
+            Output format for INCREMENTAL mode:
+            Claude Code Review
+
+            Incremental review from: <previous_sha>
+            Head SHA: <current_sha>
+
+            New findings since last Claude review:
+            - <only genuinely new issues, file + line numbers when possible>
+
+            When posting a comment, include this hidden marker at the end:
+            <!-- claude-review: reviewed_sha=<current_head_sha>; mode=${{ steps.mode.outputs.review_mode }} -->
 
             If posting is blocked, write the full review to the GitHub Actions job summary instead, then STOP.
 
 
@@ -31,5 +31,6 @@ jobs:
         uses: anthropics/claude-code-action@v1
         with:
           claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+          claude_args: "--dangerously-skip-permissions"
           additional_permissions: |
             actions: read
@@ -0,0 +1,25 @@
+# Common Runner Management Scripts
+
+Site-agnostic scripts shared between the Frontier and Phoenix runner setups.
+All shared logic lives here; site directories contain only site-specific files
+(`config.sh` and scripts unique to that cluster).
+
+Scripts are invoked via the dispatcher at `misc/runners/runner.sh`:
+```bash
+bash misc/runners/runner.sh <site> <command> [args...]
+```
+
+## Scripts
+
+| Script | Purpose |
+|---|---|
+| `runner-lib.sh` | Shared library: GitHub API helpers, EXE-based process discovery, parallel node sweep, start/stop primitives. Sourced by site `config.sh` files. |
+| `check-runners.sh` | Per-node health check: Runner.Listener processes with name, idle/BUSY, slurm PATH, RSS. Optional cgroup memory footer. |
+| `list-runners.sh` | Full table: GitHub API status × parallel node sweep. Shows slurm status, flags stale `runner.node`. |
+| `rebalance-runners.sh` | Compute optimal distribution and move runners across nodes. Handles offline runners. Writes `runner.node`. Dry run by default. |
+| `restart-runner.sh` | Stop and restart one runner on a given node. Verifies slurm in PATH. Writes `runner.node`. |
+| `restart-all.sh` | Restart all runners in place. Skips busy unless `FORCE=1`. Dry run by default. |
+| `move-runner.sh` | Move a runner to a different login node by name. Stops on current node, starts on target. Writes `runner.node`. |
+| `stop-runner.sh` | Stop a runner process and remove its GitHub registration. |
+| `rerun-failed.sh` | Rerun failed GitHub Actions workflows on open non-draft PRs and master. Dry run by default. |
+| `create-runner.sh` | Download, register, and start a new runner. Requires `runner_install_dir()` and `TARBALL_CACHE_DIR` from site config. Usage: `create-runner <name> <node> [install-dir]` |
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# Check runner health across all login nodes.
+#
+# Sourced by site wrappers (frontier/check-runners.sh, phoenix/check-runners.sh)
+# after config.sh is loaded. Shows Runner.Listener processes per node with
+# name, busy/idle status, slurm availability, and RSS memory.
+# If CGROUP_LIMIT > 0, also shows per-node total memory vs the cgroup limit.
+#
+# Usage: bash check-runners.sh
+set -euo pipefail
+
+declare -f sync_runner_nodes > /dev/null 2>&1 && {
+    echo "==> Syncing runner node locations..."
+    sync_runner_nodes
+}
+
+for node in "${NODES[@]}"; do
+    echo "=== $node ==="
+    ssh $SSH_OPTS "$node" '
+        found=0
+        for p in $(ps aux | grep Runner.Listener | grep -v grep | awk "{print \$2}"); do
+            found=1
+            exe=$(readlink -f /proc/$p/exe 2>/dev/null || echo "???")
+            dir=$(dirname "$(dirname "$exe")" 2>/dev/null || echo "???")
+            name=$(basename "$dir")
+            worker=$(ps aux | grep "Runner.Worker" | grep "$dir" | grep -v grep | awk "{print \$2}" | head -1)
+            [ -n "$worker" ] && status="BUSY" || status="idle"
+            rss=$(ps -p $p -o rss= 2>/dev/null | awk "{printf \"%.0f\", \$1/1024}" || echo "?")
+            slurm=$(tr "\0" "\n" < /proc/$p/environ 2>/dev/null | grep -c "^PATH=.*slurm" || echo 0)
+            [ "$slurm" -gt 0 ] && slurm_ok="ok" || slurm_ok="MISSING"
+            printf "  %-30s %5s  slurm=%-7s  %s MB\n" "$name" "$status" "$slurm_ok" "$rss"
+        done
+        [ "$found" -eq 0 ] && echo "  (no runners)"
+    ' 2>/dev/null || echo "  (unreachable)"
+
+    if [ "${CGROUP_LIMIT:-0}" -gt 0 ]; then
+        rss=$(ssh $SSH_OPTS "$node" \
+            "ps -u \$(whoami) -o rss= 2>/dev/null | awk '{sum+=\$1} END {printf \"%.0f\", sum/1024}'" \
+            2>/dev/null || echo "?")
+        [[ "$rss" =~ ^[0-9]+$ ]] || rss=0
+        echo "  --- Total: ${rss} MB / ${CGROUP_LIMIT} MB ($(( CGROUP_LIMIT - rss )) MB free) ---"
+    fi
+    echo ""
+done
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+# Create, register, and start a GitHub Actions runner.
+#
+# Sourced by misc/runners/runner.sh after config is loaded.
+# Config must define runner_install_dir() and may set TARBALL_CACHE_DIR.
+#
+# runner_install_dir <name> [override-dir]
+#   Returns the directory where the runner should be installed.
+#   If override-dir is given it is used directly; otherwise the site
+#   computes the path (e.g. SHARED_DIR/<name> on Frontier, or an
+#   auto-numbered actions-runner-N/ directory on Phoenix).
+#
+# TARBALL_CACHE_DIR
+#   If non-empty, the runner tarball is cached here and reused across
+#   installs (useful on Frontier where shared Lustre is visible from all
+#   login nodes). If empty or unset, a fresh download is made for each
+#   runner and the temporary file is removed after extraction.
+#
+# Usage: runner.sh <site> create-runner <name> <node> [install-dir]
+#   name         Runner name (e.g. frontier-23, phoenix-11)
+#   node         Login node to start the runner on
+#   install-dir  Optional: override the computed installation directory
+set -euo pipefail
+
+RUNNER_NAME="${1:?Usage: create-runner <name> <node> [install-dir]}"
+TARGET_NODE="${2:?Usage: create-runner <name> <node> [install-dir]}"
+INSTALL_DIR_OVERRIDE="${3:-}"
+
+RUNNER_DIR=$(runner_install_dir "$RUNNER_NAME" "$INSTALL_DIR_OVERRIDE")
+RUNNER_VERSION="${RUNNER_VERSION:-$(gh_latest_runner_version 2>/dev/null || echo "2.332.0")}"
+TARBALL="actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz"
+TARBALL_URL="https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/${TARBALL}"
+
+echo "=== Creating runner ==="
+echo "  Name:      $RUNNER_NAME"
+echo "  Node:      $TARGET_NODE"
+echo "  Directory: $RUNNER_DIR"
+echo "  Org:       $ORG"
+echo "  Group:     $RUNNER_GROUP"
+echo "  Label:     $RUNNER_LABEL"
+echo "  Version:   $RUNNER_VERSION"
+echo ""
+
+if [ -d "$RUNNER_DIR" ]; then
+    echo "ERROR: Directory already exists: $RUNNER_DIR" >&2
+    exit 1
+fi
+
+# --- Download tarball ---
+if [ -n "${TARBALL_CACHE_DIR:-}" ]; then
+    if [ ! -f "$TARBALL_CACHE_DIR/$TARBALL" ]; then
+        echo "==> Downloading runner v${RUNNER_VERSION} to cache..."
+        tmp="$TARBALL_CACHE_DIR/$TARBALL.tmp.$$"
+        curl -fsSL "$TARBALL_URL" -o "$tmp"
+        mv "$tmp" "$TARBALL_CACHE_DIR/$TARBALL"
+    fi
+    tarball_path="$TARBALL_CACHE_DIR/$TARBALL"
+else
+    echo "==> Downloading runner v${RUNNER_VERSION}..."
+    mkdir -p "$RUNNER_DIR"
+    tarball_path="$RUNNER_DIR/runner-download.tmp.$$"
+    curl -fsSL "$TARBALL_URL" -o "$tarball_path"
+fi
+
+# --- Extract ---
+mkdir -p "$RUNNER_DIR"
+echo "==> Extracting into $RUNNER_DIR..."
+tar xzf "$tarball_path" -C "$RUNNER_DIR"
+[ -z "${TARBALL_CACHE_DIR:-}" ] && rm -f "$tarball_path"
+
+if [ ! -f "$RUNNER_DIR/run.sh" ]; then
+    echo "ERROR: Extraction failed — run.sh not found in $RUNNER_DIR" >&2
+    exit 1
+fi
+
+# --- Register ---
+echo "==> Fetching registration token..."
+token=$(gh_registration_token)
+if [ -z "$token" ]; then
+    echo "ERROR: Failed to get registration token." >&2
+    echo "       Run: gh auth refresh -h github.com -s admin:org" >&2
+    exit 1
+fi
+
+echo "==> Configuring runner..."
+"$RUNNER_DIR/config.sh" \
+    --url "https://github.com/$ORG" \
+    --token "$token" \
+    --name "$RUNNER_NAME" \
+    --runnergroup "$RUNNER_GROUP" \
+    --labels "$RUNNER_LABEL" \
+    --work "_work" \
+    --unattended \
+    --replace
+echo "==> Configured."
+
+# --- Start ---
+echo "==> Starting on $TARGET_NODE..."
+if start_runner "$TARGET_NODE" "$RUNNER_DIR"; then
+    echo "$TARGET_NODE" > "$RUNNER_DIR/runner.node"
+    pids=$(find_pids "$TARGET_NODE" "$RUNNER_DIR")
+    pid=${pids%% *}
+    if has_slurm "$TARGET_NODE" "$pid"; then
+        echo "==> OK: $RUNNER_NAME running on $TARGET_NODE (PID $pid, slurm in PATH)"
+    else
+        echo "==> WARNING: $RUNNER_NAME running on $TARGET_NODE (PID $pid) but slurm MISSING from PATH"
+    fi
+else
+    echo "ERROR: $RUNNER_NAME did not start on $TARGET_NODE" >&2
+    exit 1
+fi
+
+echo ""
+echo "==> Log: $RUNNER_DIR/runner.log"