diff --git a/go.mod b/go.mod index 6953213..5f34b70 100644 --- a/go.mod +++ b/go.mod @@ -104,7 +104,7 @@ require ( k8s.io/component-base v0.35.0 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect - k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect + k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 // indirect sigs.k8s.io/gateway-api v1.4.0 // indirect sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect diff --git a/internal/controller/onboarding_controller.go b/internal/controller/onboarding_controller.go index e92a198..faac00a 100644 --- a/internal/controller/onboarding_controller.go +++ b/internal/controller/onboarding_controller.go @@ -31,6 +31,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/clock" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" logger "sigs.k8s.io/controller-runtime/pkg/log" @@ -51,6 +52,7 @@ var errRequeue = errors.New("requeue requested") const ( defaultWaitTime = 1 * time.Minute + smokeTestTimeout = 5 * time.Minute testProjectName = "test" testDomainName = "cc3test" testImageName = "cirros-kvm" @@ -63,6 +65,7 @@ type OnboardingController struct { k8sclient.Client Scheme *runtime.Scheme TestFlavorID string + Clock clock.Clock computeClient *gophercloud.ServiceClient testComputeClient *gophercloud.ServiceClient testImageClient *gophercloud.ServiceClient @@ -286,6 +289,23 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis } if !strings.Contains(consoleOutput, server.Name) { + if !server.LaunchedAt.IsZero() && r.Clock.Now().After(server.LaunchedAt.Add(smokeTestTimeout)) { + base := hv.DeepCopy() + meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeOnboarding, + Status: metav1.ConditionTrue, + Reason: kvmv1.ConditionReasonTesting, + Message: fmt.Sprintf("timeout waiting for console output since %v", server.LaunchedAt), + }) + if err := r.patchStatus(ctx, hv, base); err != nil { + return ctrl.Result{}, err + } + if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { + if !gophercloud.ResponseCodeIs(err, http.StatusNotFound) { + return ctrl.Result{}, fmt.Errorf("failed to delete timed out test instance %v: %w", server.ID, err) + } + } + } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } @@ -613,6 +633,10 @@ func (r *OnboardingController) SetupWithManager(mgr ctrl.Manager) error { } r.testNetworkClient.ResourceBase = fmt.Sprintf("%vv2.0/", r.testNetworkClient.Endpoint) + if r.Clock == nil { + r.Clock = clock.RealClock{} + } + return ctrl.NewControllerManagedBy(mgr). Named(OnboardingControllerName). For(&kvmv1.Hypervisor{}). diff --git a/internal/controller/onboarding_controller_test.go b/internal/controller/onboarding_controller_test.go index 2a74376..962d66e 100644 --- a/internal/controller/onboarding_controller_test.go +++ b/internal/controller/onboarding_controller_test.go @@ -21,6 +21,7 @@ import ( "fmt" "net/http" "os" + "time" "github.com/gophercloud/gophercloud/v2/testhelper" "github.com/gophercloud/gophercloud/v2/testhelper/client" @@ -30,6 +31,8 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/clock" + clocktesting "k8s.io/utils/clock/testing" ctrl "sigs.k8s.io/controller-runtime" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" @@ -221,6 +224,7 @@ var _ = Describe("Onboarding Controller", func() { Client: k8sClient, Scheme: k8sClient.Scheme(), TestFlavorID: "1", + Clock: clock.RealClock{}, computeClient: client.ServiceClient(fakeServer), testComputeClient: client.ServiceClient(fakeServer), testImageClient: client.ServiceClient(fakeServer), @@ -340,6 +344,12 @@ var _ = Describe("Onboarding Controller", func() { }) Context("running tests after initial setup", func() { + var ( + serverActionHandler func(http.ResponseWriter, *http.Request) + serverDeleteHandler func(http.ResponseWriter, *http.Request) + serverDetailHandler func(http.ResponseWriter, *http.Request) + ) + BeforeEach(func(ctx SpecContext) { hv := &kvmv1.Hypervisor{} Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) @@ -371,11 +381,14 @@ var _ = Describe("Onboarding Controller", func() { Expect(err).NotTo(HaveOccurred()) }) + serverDetailHandler = func(w http.ResponseWriter, _ *http.Request) { + _, err := fmt.Fprint(w, emptyServersBody) + Expect(err).NotTo(HaveOccurred()) + } fakeServer.Mux.HandleFunc("GET /servers/detail", func(w http.ResponseWriter, r *http.Request) { w.Header().Add("Content-Type", "application/json") w.WriteHeader(http.StatusOK) - _, err := fmt.Fprint(w, emptyServersBody) - Expect(err).NotTo(HaveOccurred()) + serverDetailHandler(w, r) }) fakeServer.Mux.HandleFunc("POST /instance-ha", func(w http.ResponseWriter, r *http.Request) { @@ -406,15 +419,21 @@ var _ = Describe("Onboarding Controller", func() { Expect(err).NotTo(HaveOccurred()) }) - fakeServer.Mux.HandleFunc("POST /servers/server-id/action", func(w http.ResponseWriter, r *http.Request) { + serverActionHandler = func(w http.ResponseWriter, _ *http.Request) { w.Header().Add("Content-Type", "application/json") w.WriteHeader(http.StatusOK) _, err := fmt.Fprintf(w, `{"output": "FAKE CONSOLE OUTPUT\nANOTHER\nLAST LINE\nohooc--%v-%v\n"}`, hv.Name, hv.UID) Expect(err).NotTo(HaveOccurred()) - + } + fakeServer.Mux.HandleFunc("POST /servers/server-id/action", func(w http.ResponseWriter, r *http.Request) { + serverActionHandler(w, r) }) - fakeServer.Mux.HandleFunc("DELETE /servers/server-id", func(w http.ResponseWriter, r *http.Request) { + + serverDeleteHandler = func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusAccepted) + } + fakeServer.Mux.HandleFunc("DELETE /servers/server-id", func(w http.ResponseWriter, r *http.Request) { + serverDeleteHandler(w, r) }) }) @@ -571,5 +590,80 @@ var _ = Describe("Onboarding Controller", func() { }) }) + When("smoke test times out waiting for console output", func() { + var serverDeletedCalled bool + + BeforeEach(func(ctx SpecContext) { + By("Overriding HV status to Testing state") + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) + meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeOnboarding, + Status: metav1.ConditionTrue, + Reason: kvmv1.ConditionReasonTesting, + }) + Expect(k8sClient.Status().Update(ctx, hv)).To(Succeed()) + + // Construct the server name the controller will look for. + serverName := fmt.Sprintf("%s-%s-%s", testPrefixName, hypervisorName, string(hv.UID)) + detailCallCount := 0 + + // On the first reconcile GET /servers/detail returns empty so the controller + // creates the server via POST (no launched_at → timeout cannot fire yet). + // On the second reconcile GET /servers/detail returns the ACTIVE server with a + // stale launched_at so createOrGetTestServer takes the "already-running" path and + // smokeTest fires the timeout. + serverDetailHandler = func(w http.ResponseWriter, _ *http.Request) { + if detailCallCount == 0 { + detailCallCount++ + _, err := fmt.Fprint(w, emptyServersBody) + Expect(err).NotTo(HaveOccurred()) + } else { + _, err := fmt.Fprintf(w, + `{"servers": [{"id": "server-id", "name": %q, "status": "ACTIVE", "OS-SRV-USG:launched_at": "2025-01-01T12:00:00.000000"}], "servers_links": []}`, + serverName) + Expect(err).NotTo(HaveOccurred()) + } + } + + // Set the clock to 6 minutes after the launched_at above (past the 5-minute deadline). + onboardingReconciler.Clock = clocktesting.NewFakeClock(time.Date(2025, 1, 1, 12, 6, 0, 0, time.UTC)) + serverDeletedCalled = false + + // Console output that does NOT contain the server name, so the timeout path is exercised. + serverActionHandler = func(w http.ResponseWriter, _ *http.Request) { + w.Header().Add("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, err := fmt.Fprint(w, `{"output": "some unrelated console output\n"}`) + Expect(err).NotTo(HaveOccurred()) + } + serverDeleteHandler = func(w http.ResponseWriter, _ *http.Request) { + serverDeletedCalled = true + w.WriteHeader(http.StatusAccepted) + } + }) + + It("should delete the stalled server and record a timeout in the status", func(ctx SpecContext) { + By("First reconcile: controller creates the ACTIVE server; launched_at is absent so timeout does not fire yet") + _, err := onboardingReconciler.Reconcile(ctx, reconcileReq) + Expect(err).NotTo(HaveOccurred()) + + By("Second reconcile: GET /servers/detail returns the stale server; timeout fires and the server is deleted") + _, err = onboardingReconciler.Reconcile(ctx, reconcileReq) + Expect(err).NotTo(HaveOccurred()) + + By("Verifying the timed-out server was deleted") + Expect(serverDeletedCalled).To(BeTrue()) + + By("Verifying the onboarding condition message indicates a timeout") + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) + onboardingCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) + Expect(onboardingCond).NotTo(BeNil()) + Expect(onboardingCond.Reason).To(Equal(kvmv1.ConditionReasonTesting)) + Expect(onboardingCond.Message).To(ContainSubstring("timeout")) + }) + }) + }) })