Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,29 @@ jobs:
- name: Start podman socket
run: systemctl --user start podman.socket

- name: Build operator image
run: make buildimg

- name: Start bink cluster
run: make start-bink

- name: Deploy to bink cluster
run: make deploy-bink

- name: Gather deploy logs
if: failure()
run: make gather-bink

- name: Run e2e tests
run: make buildimg deploy-bink e2e V=1
run: make e2e V=1

- name: Upload logs
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: e2e-logs
path: _output/logs/
if-no-files-found: ignore

- name: Push to GHCR
if: github.event_name == 'push'
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
_output/
bin/
*.out
kubeconfig-*
26 changes: 21 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CONTAINER_TOOL ?= podman
# To use a separate dev cluster: make deploy-bink BINK_CLUSTER_NAME=dev
BINK_CLUSTER_NAME ?= e2e
KUBECONFIG_BINK ?= ./kubeconfig-$(BINK_CLUSTER_NAME)
ARTIFACTS ?= $(abspath _output/logs)
# YEAR defines the year value used for substituting the YEAR placeholder in the boilerplate header.
YEAR ?= $(shell date +%Y)

Expand Down Expand Up @@ -58,8 +59,10 @@ e2e: ## Run e2e tests (requires: make deploy-bink). V=1 for verbose. RUN=<regex>
# actually gives us streaming output (otherwise, it spawns a subprocess for
# each package, even though we just have one here--but I really like streaming
# output...).
rm -rf $(ARTIFACTS)
cd test/e2e && KUBECONFIG=$(abspath $(KUBECONFIG_BINK)) BINK_CLUSTER_NAME=$(BINK_CLUSTER_NAME) \
$(if $(BINK_NODE_IMAGE),BINK_NODE_IMAGE=$(BINK_NODE_IMAGE)) \
ARTIFACTS=$(ARTIFACTS) \
go test -timeout 10m -count=1 $(if $(V),-v) $(if $(RUN),-run $(RUN)) .

##@ Build
Expand Down Expand Up @@ -108,18 +111,31 @@ undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.
# Note the :latest tag here: this makes the pull policy be Always.
IMG_BINK ?= registry.cluster.local:5000/bootc-operator-e2e:latest

.PHONY: deploy-bink
deploy-bink: kustomize ## Deploy to a bink cluster (idempotent, requires: buildimg).
.PHONY: start-bink
start-bink: ## Start a bink cluster (idempotent).
bink registry start
podman push --tls-verify=false $(IMG) localhost:5000/bootc-operator-e2e:latest
bink cluster list 2>&1 | grep -qw $(BINK_CLUSTER_NAME) || \
bink cluster start --cluster-name $(BINK_CLUSTER_NAME) --node-name controller --api-port 0 --expose $(KUBECONFIG_BINK) \
$(if $(BINK_NODE_IMAGE),--node-image $(BINK_NODE_IMAGE))
kubectl --kubeconfig $(KUBECONFIG_BINK) wait --for=condition=Ready node/controller --timeout=5m
$(MAKE) deploy KUBECONFIG=$(abspath $(KUBECONFIG_BINK)) IMG=$(IMG_BINK)
kubectl --kubeconfig $(KUBECONFIG_BINK) -n bootc-operator rollout restart deployment/bootc-operator-controller-manager

.PHONY: deploy-bink
deploy-bink: start-bink kustomize ## Deploy to a bink cluster (requires: buildimg).
podman push --tls-verify=false $(IMG) localhost:5000/bootc-operator-e2e:latest
# On re-deploy, restart the rollout to force a re-pull of the :latest tag.
# On fresh deploy, skip the restart -- the pod is already pulling the correct image.
@existed=$$(kubectl --kubeconfig $(KUBECONFIG_BINK) -n bootc-operator get deploy bootc-operator-controller-manager -o name 2>/dev/null || true) && \
$(MAKE) deploy KUBECONFIG=$(abspath $(KUBECONFIG_BINK)) IMG=$(IMG_BINK) && \
if [ -n "$$existed" ]; then \
kubectl --kubeconfig $(KUBECONFIG_BINK) -n bootc-operator rollout restart deployment/bootc-operator-controller-manager; \
fi
kubectl --kubeconfig $(KUBECONFIG_BINK) -n bootc-operator rollout status deployment/bootc-operator-controller-manager --timeout=3m

.PHONY: gather-bink
gather-bink: ## Gather diagnostic logs from the bink cluster.
KUBECONFIG=$(abspath $(KUBECONFIG_BINK)) BINK_CLUSTER_NAME=$(BINK_CLUSTER_NAME) \
hack/gather-logs.sh $(ARTIFACTS)/gather-bink controller

.PHONY: teardown-bink
teardown-bink: ## Tear down the bink cluster.
bink cluster stop --remove-data --cluster-name $(BINK_CLUSTER_NAME)
Expand Down
55 changes: 55 additions & 0 deletions hack/gather-logs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash
# Gather diagnostic logs from a bink cluster.
#
# Usage: hack/gather-logs.sh <output-dir> [node-names...]
#
# Expects KUBECONFIG and BINK_CLUSTER_NAME from environment.
# Each command's output is written to a separate file in <output-dir>.
# Individual command failures are non-fatal.

set -euo pipefail

if [[ $# -lt 1 ]]; then
echo "Usage: $0 <output-dir> [node-names...]" >&2
exit 1
fi

: "${KUBECONFIG:?must be set}"
: "${BINK_CLUSTER_NAME:?must be set}"

output_dir="$1"
shift
nodes=("$@")

mkdir -p "${output_dir}"

echo "Gathering logs to ${output_dir}..."

run() {
local filename="$1"
shift
echo " ${filename}"
"$@" > "${output_dir}/${filename}" 2>&1 || true
}

# Cluster-wide commands
run "k-get-pods.txt" kubectl get pods -n bootc-operator -o wide
run "k-describe-pods.txt" kubectl describe pods -n bootc-operator
run "k-get-deployment.yaml" kubectl get deployment -n bootc-operator -o yaml
run "k-describe-bootcnodepools.txt" kubectl describe bootcnodepools
run "k-describe-bootcnodes.txt" kubectl describe bootcnodes
run "k-get-events.txt" kubectl get events -n bootc-operator --sort-by=.lastTimestamp

# Pod logs
for pod in $(kubectl get pods -n bootc-operator -o jsonpath='{.items[*].metadata.name}' 2>/dev/null); do
run "k-logs-${pod}.log" kubectl logs -n bootc-operator "${pod}" --all-containers
run "k-logs-${pod}-previous.log" kubectl logs -n bootc-operator "${pod}" --all-containers --previous
done

# Per-node commands
for node in "${nodes[@]}"; do
run "k-describe-node-${node}.txt" kubectl describe node "${node}"
run "journal-${node}.txt" bink node ssh "${node}" --cluster-name "${BINK_CLUSTER_NAME}" -- journalctl --no-pager
done

echo "Done."
30 changes: 29 additions & 1 deletion test/e2e/e2eutil/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -168,8 +169,11 @@ func (e *Env) TestLabels() map[string]string {
return map[string]string{LabelE2ETest: e.testID}
}

// cleanup deletes test-scoped resources and bink nodes.
// cleanup gathers diagnostic logs, then deletes test-scoped resources
// and bink nodes.
func (e *Env) cleanup(t *testing.T) {
e.gatherLogs(t)

ctx := context.Background()
t.Logf("Removing pools with label %s=%s...", LabelE2ETest, e.testID)
if err := e.Client.DeleteAllOf(ctx, &bootcv1alpha1.BootcNodePool{}, client.MatchingLabels(e.TestLabels())); err != nil {
Expand All @@ -183,6 +187,30 @@ func (e *Env) cleanup(t *testing.T) {
}
}

// gatherLogs calls hack/gather-logs.sh to collect diagnostic logs into
// $ARTIFACTS/<testID>/. Skipped if ARTIFACTS is not set.
func (e *Env) gatherLogs(t *testing.T) {
artifactsDir := os.Getenv("ARTIFACTS")
if artifactsDir == "" {
return
}

outputDir := filepath.Join(artifactsDir, e.testID)

// The test runs from test/e2e/, so resolve the script relative
// to the repo root.
args := []string{"../../hack/gather-logs.sh", outputDir}
args = append(args, e.nodes...)

t.Logf("Gathering logs to %s...", outputDir)
cmd := exec.Command("bash", args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
t.Logf("WARNING: gather-logs.sh failed: %v", err)
}
}

// sanitizeTestName lowercases a test name for use in k8s object names.
// Panics if the result exceeds 63 characters (k8s label value limit).
func sanitizeTestName(name string) string {
Expand Down