Skip to content

Commit 1645179

Browse files
Test for AutoNodeSizing
1 parent 3cec87c commit 1645179

3 files changed

Lines changed: 332 additions & 8 deletions

File tree

test/extended/node/node_e2e/node.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,21 @@ import (
66

77
g "github.com/onsi/ginkgo/v2"
88
o "github.com/onsi/gomega"
9+
nodeutils "github.com/openshift/origin/test/extended/node"
910
exutil "github.com/openshift/origin/test/extended/util"
1011
"k8s.io/apimachinery/pkg/util/wait"
1112
e2e "k8s.io/kubernetes/test/e2e/framework"
1213
)
1314

1415
var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager", func() {
1516
var (
16-
oc = exutil.NewCLIWithoutNamespace("node").AsAdmin()
17+
oc = exutil.NewCLIWithoutNamespace("node")
1718
)
1819

1920
//author: asahay@redhat.com
2021
g.It("[OTP] validate KUBELET_LOG_LEVEL", func() {
2122
var kubeservice string
22-
var kublet string
23+
var kubelet string
2324
var err error
2425

2526
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
@@ -33,28 +34,28 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
3334
g.By("Polling to check kubelet log level on ready nodes")
3435
waitErr := wait.Poll(10*time.Second, 1*time.Minute, func() (bool, error) {
3536
g.By("Getting all node names in the cluster")
36-
nodeName, nodeErr := oc.AsAdmin().WithoutNamespace().Run("get").Args("nodes", "-o=jsonpath={.items[*].metadata.name}").Output()
37+
nodeName, nodeErr := oc.AsAdmin().Run("get").Args("nodes", "-o=jsonpath={.items[*].metadata.name}").Output()
3738
o.Expect(nodeErr).NotTo(o.HaveOccurred())
3839
e2e.Logf("\nNode Names are %v", nodeName)
3940
nodes := strings.Fields(nodeName)
4041

4142
for _, node := range nodes {
4243
g.By("Checking if node " + node + " is Ready")
43-
nodeStatus, statusErr := oc.AsAdmin().WithoutNamespace().Run("get").Args("nodes", node, "-o=jsonpath={.status.conditions[?(@.type=='Ready')].status}").Output()
44+
nodeStatus, statusErr := oc.AsAdmin().Run("get").Args("nodes", node, "-o=jsonpath={.status.conditions[?(@.type=='Ready')].status}").Output()
4445
o.Expect(statusErr).NotTo(o.HaveOccurred())
4546
e2e.Logf("\nNode %s Status is %s\n", node, nodeStatus)
4647

4748
if nodeStatus == "True" {
4849
g.By("Checking KUBELET_LOG_LEVEL in kubelet.service on node " + node)
49-
kubeservice, err = oc.AsAdmin().WithoutNamespace().Run("debug").Args("node/"+node, "-ndefault", "--", "chroot", "/host", "/bin/bash", "-c", "systemctl show kubelet.service | grep KUBELET_LOG_LEVEL").Output()
50+
kubeservice, err = nodeutils.ExecOnNodeWithChroot(oc, node, "/bin/bash", "-c", "systemctl show kubelet.service | grep KUBELET_LOG_LEVEL")
5051
o.Expect(err).NotTo(o.HaveOccurred())
5152

5253
g.By("Checking kubelet process for --v=2 flag on node " + node)
53-
kublet, err = oc.AsAdmin().WithoutNamespace().Run("debug").Args("node/"+node, "-ndefault", "--", "chroot", "/host", "/bin/bash", "-c", "ps aux | grep kubelet").Output()
54+
kubelet, err = nodeutils.ExecOnNodeWithChroot(oc, node, "/bin/bash", "-c", "ps aux | grep [k]ubelet")
5455
o.Expect(err).NotTo(o.HaveOccurred())
5556

5657
g.By("Verifying KUBELET_LOG_LEVEL is set and kubelet is running with --v=2")
57-
if strings.Contains(string(kubeservice), "KUBELET_LOG_LEVEL") && strings.Contains(string(kublet), "--v=2") {
58+
if strings.Contains(kubeservice, "KUBELET_LOG_LEVEL") && strings.Contains(kubelet, "--v=2") {
5859
e2e.Logf("KUBELET_LOG_LEVEL is 2.\n")
5960
return true, nil
6061
} else {
@@ -70,7 +71,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
7071

7172
if waitErr != nil {
7273
e2e.Logf("Kubelet Log level is:\n %v\n", kubeservice)
73-
e2e.Logf("Running Process of kubelet are:\n %v\n", kublet)
74+
e2e.Logf("Running Process of kubelet are:\n %v\n", kubelet)
7475
}
7576
o.Expect(waitErr).NotTo(o.HaveOccurred(), "KUBELET_LOG_LEVEL is not expected, timed out")
7677
})

test/extended/node/node_sizing.go

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
package node
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strings"
7+
"time"
8+
9+
g "github.com/onsi/ginkgo/v2"
10+
o "github.com/onsi/gomega"
11+
corev1 "k8s.io/api/core/v1"
12+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
"k8s.io/kubernetes/test/e2e/framework"
14+
15+
mcfgv1 "github.com/openshift/api/machineconfiguration/v1"
16+
machineconfigclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned"
17+
exutil "github.com/openshift/origin/test/extended/util"
18+
)
19+
20+
var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptive] Node sizing", func() {
21+
defer g.GinkgoRecover()
22+
23+
oc := exutil.NewCLIWithoutNamespace("node-sizing")
24+
25+
g.It("should have NODE_SIZING_ENABLED=true by default and NODE_SIZING_ENABLED=false when KubeletConfig with autoSizingReserved=false is applied", func(ctx context.Context) {
26+
// Skip on MicroShift since it doesn't have the Machine Config Operator
27+
isMicroshift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
28+
o.Expect(err).NotTo(o.HaveOccurred())
29+
if isMicroshift {
30+
g.Skip("Not supported on MicroShift")
31+
}
32+
33+
mcClient, err := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig())
34+
o.Expect(err).NotTo(o.HaveOccurred(), "Error creating MCO client")
35+
36+
testMCPName := "node-sizing-test"
37+
testNodeMCPLabel := fmt.Sprintf("node-role.kubernetes.io/%s", testMCPName)
38+
kubeletConfigName := "auto-sizing-enabled"
39+
40+
// Verify the default state (NODE_SIZING_ENABLED=false)
41+
// This feature is added in OCP 4.21
42+
g.By("Getting a worker node to test")
43+
nodes, err := oc.AdminKubeClient().CoreV1().Nodes().List(ctx, metav1.ListOptions{
44+
LabelSelector: "node-role.kubernetes.io/worker",
45+
})
46+
o.Expect(err).NotTo(o.HaveOccurred(), "Should be able to list worker nodes")
47+
o.Expect(len(nodes.Items)).To(o.BeNumerically(">", 0), "Should have at least one worker node")
48+
49+
// Select first worker node and label it for our custom MCP
50+
// This approach is taken so that all the nodes do not restart at the same time for the test
51+
nodeName := nodes.Items[0].Name
52+
framework.Logf("Testing on node: %s", nodeName)
53+
54+
g.By(fmt.Sprintf("Labeling node %s with %s", nodeName, testNodeMCPLabel))
55+
node, err := oc.AdminKubeClient().CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
56+
o.Expect(err).NotTo(o.HaveOccurred(), "Should be able to get node")
57+
58+
if node.Labels == nil {
59+
node.Labels = make(map[string]string)
60+
}
61+
node.Labels[testNodeMCPLabel] = ""
62+
_, err = oc.AdminKubeClient().CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{})
63+
o.Expect(err).NotTo(o.HaveOccurred(), "Should be able to label node")
64+
65+
// Create custom MCP
66+
g.By(fmt.Sprintf("Creating custom MachineConfigPool %s", testMCPName))
67+
testMCP := &mcfgv1.MachineConfigPool{
68+
TypeMeta: metav1.TypeMeta{
69+
APIVersion: "machineconfiguration.openshift.io/v1",
70+
Kind: "MachineConfigPool",
71+
},
72+
ObjectMeta: metav1.ObjectMeta{
73+
Name: testMCPName,
74+
Labels: map[string]string{
75+
"machineconfiguration.openshift.io/pool": testMCPName,
76+
},
77+
},
78+
Spec: mcfgv1.MachineConfigPoolSpec{
79+
MachineConfigSelector: &metav1.LabelSelector{
80+
MatchExpressions: []metav1.LabelSelectorRequirement{
81+
{
82+
Key: "machineconfiguration.openshift.io/role",
83+
Operator: metav1.LabelSelectorOpIn,
84+
Values: []string{"worker", testMCPName},
85+
},
86+
},
87+
},
88+
NodeSelector: &metav1.LabelSelector{
89+
MatchLabels: map[string]string{
90+
testNodeMCPLabel: "",
91+
},
92+
},
93+
},
94+
}
95+
96+
_, err = mcClient.MachineconfigurationV1().MachineConfigPools().Create(ctx, testMCP, metav1.CreateOptions{})
97+
o.Expect(err).NotTo(o.HaveOccurred(), "Should be able to create custom MachineConfigPool")
98+
99+
cleanupMCP := func() {
100+
g.By("Cleaning up custom MachineConfigPool")
101+
deleteErr := mcClient.MachineconfigurationV1().MachineConfigPools().Delete(ctx, testMCPName, metav1.DeleteOptions{})
102+
if deleteErr != nil {
103+
framework.Logf("Failed to delete MachineConfigPool %s: %v", testMCPName, deleteErr)
104+
}
105+
}
106+
107+
cleanupNodeLabel := func() {
108+
g.By(fmt.Sprintf("Removing node label %s from node %s", testNodeMCPLabel, nodeName))
109+
node, getErr := oc.AdminKubeClient().CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
110+
if getErr != nil {
111+
framework.Logf("Failed to get node for cleanup: %v", getErr)
112+
return
113+
}
114+
115+
delete(node.Labels, testNodeMCPLabel)
116+
_, updateErr := oc.AdminKubeClient().CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{})
117+
if updateErr != nil {
118+
framework.Logf("Failed to remove label from node %s: %v", nodeName, updateErr)
119+
return
120+
}
121+
122+
// Wait for the node to transition back to the worker pool configuration
123+
g.By(fmt.Sprintf("Waiting for node %s to transition back to worker pool", nodeName))
124+
o.Eventually(func() bool {
125+
currentNode, err := oc.AdminKubeClient().CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
126+
if err != nil {
127+
framework.Logf("Error getting node: %v", err)
128+
return false
129+
}
130+
currentConfig := currentNode.Annotations["machineconfiguration.openshift.io/currentConfig"]
131+
desiredConfig := currentNode.Annotations["machineconfiguration.openshift.io/desiredConfig"]
132+
133+
// Check if the node is using a worker config (not node-sizing-test config)
134+
isWorkerConfig := currentConfig != "" && !strings.Contains(currentConfig, testMCPName) && currentConfig == desiredConfig
135+
if isWorkerConfig {
136+
framework.Logf("Node %s successfully transitioned to worker config: %s", nodeName, currentConfig)
137+
} else {
138+
framework.Logf("Node %s still transitioning: current=%s, desired=%s", nodeName, currentConfig, desiredConfig)
139+
}
140+
return isWorkerConfig
141+
}, 10*time.Minute, 10*time.Second).Should(o.BeTrue(), fmt.Sprintf("Node %s should transition back to worker pool", nodeName))
142+
}
143+
144+
// Register DeferCleanup so cleanup happens even on test failure
145+
// DeferCleanup runs in LIFO order: MCP deleted last (registered first)
146+
g.DeferCleanup(cleanupMCP)
147+
g.DeferCleanup(cleanupNodeLabel)
148+
149+
g.By("Waiting for custom MachineConfigPool to be ready")
150+
err = waitForMCPToBeReady(ctx, mcClient, testMCPName, 5*time.Minute)
151+
o.Expect(err).NotTo(o.HaveOccurred(), "Custom MachineConfigPool should become ready")
152+
153+
verifyNodeSizingEnabledFile(oc, nodeName, "true")
154+
155+
// Now apply KubeletConfig and verify NODE_SIZING_ENABLED=false
156+
157+
g.By("Creating KubeletConfig with autoSizingReserved=false")
158+
autoSizingReserved := false
159+
kubeletConfig := &mcfgv1.KubeletConfig{
160+
TypeMeta: metav1.TypeMeta{
161+
APIVersion: "machineconfiguration.openshift.io/v1",
162+
Kind: "KubeletConfig",
163+
},
164+
ObjectMeta: metav1.ObjectMeta{
165+
Name: kubeletConfigName,
166+
},
167+
Spec: mcfgv1.KubeletConfigSpec{
168+
AutoSizingReserved: &autoSizingReserved,
169+
MachineConfigPoolSelector: &metav1.LabelSelector{
170+
MatchLabels: map[string]string{
171+
"machineconfiguration.openshift.io/pool": testMCPName,
172+
},
173+
},
174+
},
175+
}
176+
177+
_, err = mcClient.MachineconfigurationV1().KubeletConfigs().Create(ctx, kubeletConfig, metav1.CreateOptions{})
178+
o.Expect(err).NotTo(o.HaveOccurred(), "Should be able to create KubeletConfig")
179+
180+
cleanupKubeletConfig := func() {
181+
g.By("Cleaning up KubeletConfig")
182+
deleteErr := mcClient.MachineconfigurationV1().KubeletConfigs().Delete(ctx, kubeletConfigName, metav1.DeleteOptions{})
183+
if deleteErr != nil {
184+
framework.Logf("Failed to delete KubeletConfig %s: %v", kubeletConfigName, deleteErr)
185+
}
186+
187+
// Wait for custom MCP to be ready after cleanup
188+
g.By("Waiting for custom MCP to be ready after KubeletConfig deletion")
189+
waitErr := waitForMCPToBeReady(ctx, mcClient, testMCPName, 10*time.Minute)
190+
if waitErr != nil {
191+
framework.Logf("Failed to wait for custom MCP to be ready: %v", waitErr)
192+
}
193+
}
194+
g.DeferCleanup(cleanupKubeletConfig)
195+
196+
g.By("Waiting for KubeletConfig to be created")
197+
var createdKC *mcfgv1.KubeletConfig
198+
o.Eventually(func() error {
199+
createdKC, err = mcClient.MachineconfigurationV1().KubeletConfigs().Get(ctx, kubeletConfigName, metav1.GetOptions{})
200+
return err
201+
}, 30*time.Second, 5*time.Second).Should(o.Succeed(), "KubeletConfig should be created")
202+
203+
o.Expect(createdKC.Spec.AutoSizingReserved).NotTo(o.BeNil(), "AutoSizingReserved should not be nil")
204+
o.Expect(*createdKC.Spec.AutoSizingReserved).To(o.BeFalse(), "AutoSizingReserved should be false")
205+
206+
g.By(fmt.Sprintf("Waiting for %s MCP to start updating", testMCPName))
207+
o.Eventually(func() bool {
208+
mcp, err := mcClient.MachineconfigurationV1().MachineConfigPools().Get(ctx, testMCPName, metav1.GetOptions{})
209+
if err != nil {
210+
framework.Logf("Error getting %s MCP: %v", testMCPName, err)
211+
return false
212+
}
213+
// Check if MCP is updating (has conditions indicating update in progress)
214+
for _, condition := range mcp.Status.Conditions {
215+
if condition.Type == "Updating" && condition.Status == corev1.ConditionTrue {
216+
return true
217+
}
218+
}
219+
return false
220+
}, 2*time.Minute, 10*time.Second).Should(o.BeTrue(), fmt.Sprintf("%s MCP should start updating", testMCPName))
221+
222+
g.By(fmt.Sprintf("Waiting for %s MCP to be ready with new configuration", testMCPName))
223+
err = waitForMCPToBeReady(ctx, mcClient, testMCPName, 15*time.Minute)
224+
o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("%s MCP should become ready with new configuration", testMCPName))
225+
226+
verifyNodeSizingEnabledFile(oc, nodeName, "false")
227+
228+
// Explicit cleanup on success; DeferCleanup ensures cleanup also runs on failure
229+
cleanupKubeletConfig()
230+
cleanupNodeLabel()
231+
cleanupMCP()
232+
})
233+
})
234+
235+
// verifyNodeSizingEnabledFile verifies the NODE_SIZING_ENABLED value in the env file
236+
func verifyNodeSizingEnabledFile(oc *exutil.CLI, nodeName, expectedValue string) {
237+
g.By("Verifying /etc/node-sizing-enabled.env file exists")
238+
239+
output, err := ExecOnNodeWithChroot(oc, nodeName, "test", "-f", "/etc/node-sizing-enabled.env")
240+
o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("File /etc/node-sizing-enabled.env should exist on node %s. Output: %s", nodeName, output))
241+
242+
g.By("Reading /etc/node-sizing-enabled.env file contents")
243+
output, err = ExecOnNodeWithChroot(oc, nodeName, "cat", "/etc/node-sizing-enabled.env")
244+
o.Expect(err).NotTo(o.HaveOccurred(), "Should be able to read /etc/node-sizing-enabled.env")
245+
246+
framework.Logf("Contents of /etc/node-sizing-enabled.env:\n%s", output)
247+
248+
g.By(fmt.Sprintf("Verifying NODE_SIZING_ENABLED=%s is set in the file", expectedValue))
249+
o.Expect(strings.TrimSpace(output)).To(o.ContainSubstring(fmt.Sprintf("NODE_SIZING_ENABLED=%s", expectedValue)),
250+
fmt.Sprintf("File should contain NODE_SIZING_ENABLED=%s", expectedValue))
251+
252+
framework.Logf("Successfully verified NODE_SIZING_ENABLED=%s on node %s", expectedValue, nodeName)
253+
}

test/extended/node/node_utils.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package node
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"time"
7+
8+
corev1 "k8s.io/api/core/v1"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10+
"k8s.io/apimachinery/pkg/util/wait"
11+
"k8s.io/kubernetes/test/e2e/framework"
12+
13+
machineconfigclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned"
14+
exutil "github.com/openshift/origin/test/extended/util"
15+
)
16+
17+
// ExecOnNodeWithChroot runs a command on a node using oc debug with chroot /host.
18+
func ExecOnNodeWithChroot(oc *exutil.CLI, nodeName string, cmd ...string) (string, error) {
19+
args := append([]string{"node/" + nodeName, "--", "chroot", "/host"}, cmd...)
20+
return oc.AsAdmin().Run("debug").Args(args...).Output()
21+
}
22+
23+
// waitForMCPToBeReady waits for a MachineConfigPool to be ready
24+
func waitForMCPToBeReady(ctx context.Context, mcClient *machineconfigclient.Clientset, poolName string, timeout time.Duration) error {
25+
return wait.PollImmediate(10*time.Second, timeout, func() (bool, error) {
26+
mcp, err := mcClient.MachineconfigurationV1().MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{})
27+
if err != nil {
28+
return false, err
29+
}
30+
31+
// Check if all conditions are met for a ready state
32+
updating := false
33+
degraded := false
34+
ready := false
35+
36+
for _, condition := range mcp.Status.Conditions {
37+
switch condition.Type {
38+
case "Updating":
39+
if condition.Status == corev1.ConditionTrue {
40+
updating = true
41+
}
42+
case "Degraded":
43+
if condition.Status == corev1.ConditionTrue {
44+
degraded = true
45+
}
46+
case "Updated":
47+
if condition.Status == corev1.ConditionTrue {
48+
ready = true
49+
}
50+
}
51+
}
52+
53+
if degraded {
54+
return false, fmt.Errorf("MachineConfigPool %s is degraded", poolName)
55+
}
56+
57+
// Ready when not updating and updated condition is true
58+
isReady := !updating && ready && mcp.Status.ReadyMachineCount == mcp.Status.MachineCount
59+
60+
if isReady {
61+
framework.Logf("MachineConfigPool %s is ready: %d/%d machines ready",
62+
poolName, mcp.Status.ReadyMachineCount, mcp.Status.MachineCount)
63+
} else {
64+
framework.Logf("MachineConfigPool %s not ready yet: updating=%v, ready=%v, machines=%d/%d",
65+
poolName, updating, ready, mcp.Status.ReadyMachineCount, mcp.Status.MachineCount)
66+
}
67+
68+
return isReady, nil
69+
})
70+
}

0 commit comments

Comments
 (0)