Skip to content

Commit 8f0d3d0

Browse files
mkiewegblakepetterssonsvghadi
authored
feat: Timestamp for Health Status (#16972) (#18660)
* add lastTransitionTime to health status Signed-off-by: Manuel Kieweg <[email protected]> * address first feedback Signed-off-by: Manuel Kieweg <[email protected]> * set transition time if health status is unknown Signed-off-by: Manuel Kieweg <[email protected]> * extend health improvement tests Signed-off-by: Manuel Kieweg <[email protected]> * add apoplication controller test Signed-off-by: Manuel Kieweg <[email protected]> * use require for NoError Signed-off-by: Manuel Kieweg <[email protected]> * more extensive tests for health state changes Signed-off-by: Manuel Kieweg <[email protected]> * Apply suggestions from code review Co-authored-by: Blake Pettersson <[email protected]> Signed-off-by: Manuel Kieweg <[email protected]> * Code review suggestions Signed-off-by: Manuel Kieweg <[email protected]> * remove obsolete assert Signed-off-by: Manuel Kieweg <[email protected]> * Change LastTransitionTime field to pointer type Due to implementation limitations, setting LastTransitionTime at the resource level is challenging. Converting it to a pointer type allows it to be skipped at the resource level and prevents it from appearing in .status.resources of the Application CR. Additionally, it doesn’t provide much value or have a known use case right now. Signed-off-by: Siddhesh Ghadi <[email protected]> * Resolve rebase conflicts Signed-off-by: Siddhesh Ghadi <[email protected]> * Address review comment Signed-off-by: Siddhesh Ghadi <[email protected]> * Trigger CI Signed-off-by: Siddhesh Ghadi <[email protected]> --------- Signed-off-by: Manuel Kieweg <[email protected]> Signed-off-by: Manuel Kieweg <[email protected]> Signed-off-by: Siddhesh Ghadi <[email protected]> Co-authored-by: Blake Pettersson <[email protected]> Co-authored-by: Siddhesh Ghadi <[email protected]>
1 parent 4f6e408 commit 8f0d3d0

File tree

17 files changed

+1186
-726
lines changed

17 files changed

+1186
-726
lines changed

assets/swagger.json

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

controller/appcontroller.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1630,9 +1630,11 @@ func (ctrl *ApplicationController) processAppRefreshQueueItem() (processNext boo
16301630

16311631
project, hasErrors := ctrl.refreshAppConditions(app)
16321632
ts.AddCheckpoint("refresh_app_conditions_ms")
1633+
now := metav1.Now()
16331634
if hasErrors {
16341635
app.Status.Sync.Status = appv1.SyncStatusCodeUnknown
16351636
app.Status.Health.Status = health.HealthStatusUnknown
1637+
app.Status.Health.LastTransitionTime = &now
16361638
patchMs = ctrl.persistAppStatus(origApp, &app.Status)
16371639

16381640
if err := ctrl.cache.SetAppResourcesTree(app.InstanceName(ctrl.namespace), &appv1.ApplicationTree{}); err != nil {
@@ -1676,7 +1678,6 @@ func (ctrl *ApplicationController) processAppRefreshQueueItem() (processNext boo
16761678
revisions = append(revisions, revision)
16771679
sources = append(sources, app.Spec.GetSource())
16781680
}
1679-
now := metav1.Now()
16801681

16811682
compareResult, err := ctrl.appStateManager.CompareAppState(app, project, revisions, sources,
16821683
refreshType == appv1.RefreshTypeHard,

controller/appcontroller_test.go

Lines changed: 226 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@ import (
99
"time"
1010

1111
clustercache "github.com/argoproj/gitops-engine/pkg/cache"
12+
"github.com/argoproj/gitops-engine/pkg/health"
1213
"github.com/argoproj/gitops-engine/pkg/utils/kube/kubetest"
1314
"github.com/sirupsen/logrus"
1415
"github.com/stretchr/testify/require"
1516
"k8s.io/apimachinery/pkg/api/resource"
17+
"k8s.io/apimachinery/pkg/labels"
1618
"k8s.io/apimachinery/pkg/util/wait"
1719
"k8s.io/client-go/rest"
1820
"k8s.io/utils/ptr"
@@ -26,6 +28,7 @@ import (
2628
"github.com/argoproj/gitops-engine/pkg/utils/kube"
2729
"github.com/stretchr/testify/assert"
2830
"github.com/stretchr/testify/mock"
31+
v1 "k8s.io/api/apps/v1"
2932
corev1 "k8s.io/api/core/v1"
3033
apierr "k8s.io/apimachinery/pkg/api/errors"
3134
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -90,6 +93,10 @@ func (m *MockKubectl) DeleteResource(ctx context.Context, config *rest.Config, g
9093
}
9194

9295
func newFakeController(data *fakeData, repoErr error) *ApplicationController {
96+
return newFakeControllerWithResync(data, time.Minute, repoErr)
97+
}
98+
99+
func newFakeControllerWithResync(data *fakeData, appResyncPeriod time.Duration, repoErr error) *ApplicationController {
93100
var clust corev1.Secret
94101
err := yaml.Unmarshal([]byte(fakeCluster), &clust)
95102
if err != nil {
@@ -155,7 +162,7 @@ func newFakeController(data *fakeData, repoErr error) *ApplicationController {
155162
1*time.Minute,
156163
),
157164
kubectl,
158-
time.Minute,
165+
appResyncPeriod,
159166
time.Hour,
160167
time.Second,
161168
time.Minute,
@@ -492,10 +499,23 @@ func newFakeApp() *v1alpha1.Application {
492499
return createFakeApp(fakeApp)
493500
}
494501

502+
func newFakeAppWithHealthAndTime(status health.HealthStatusCode, timestamp metav1.Time) *v1alpha1.Application {
503+
return createFakeAppWithHealthAndTime(fakeApp, status, timestamp)
504+
}
505+
495506
func newFakeMultiSourceApp() *v1alpha1.Application {
496507
return createFakeApp(fakeMultiSourceApp)
497508
}
498509

510+
func createFakeAppWithHealthAndTime(testApp string, status health.HealthStatusCode, timestamp metav1.Time) *v1alpha1.Application {
511+
app := createFakeApp(testApp)
512+
app.Status.Health = v1alpha1.HealthStatus{
513+
Status: status,
514+
LastTransitionTime: &timestamp,
515+
}
516+
return app
517+
}
518+
499519
func newFakeAppWithDestMismatch() *v1alpha1.Application {
500520
return createFakeApp(fakeAppWithDestMismatch)
501521
}
@@ -1669,6 +1689,211 @@ func TestUpdateReconciledAt(t *testing.T) {
16691689
})
16701690
}
16711691

1692+
func TestUpdateHealthStatusTransitionTime(t *testing.T) {
1693+
deployment := kube.MustToUnstructured(&v1.Deployment{
1694+
TypeMeta: metav1.TypeMeta{
1695+
APIVersion: "apps/v1",
1696+
Kind: "Deployment",
1697+
},
1698+
ObjectMeta: metav1.ObjectMeta{
1699+
Name: "demo",
1700+
Namespace: "default",
1701+
},
1702+
})
1703+
testCases := []struct {
1704+
name string
1705+
app *v1alpha1.Application
1706+
configMapData map[string]string
1707+
expectedStatus health.HealthStatusCode
1708+
}{
1709+
{
1710+
name: "Degraded to Missing",
1711+
app: newFakeAppWithHealthAndTime(health.HealthStatusDegraded, testTimestamp),
1712+
configMapData: map[string]string{
1713+
"resource.customizations": `
1714+
apps/Deployment:
1715+
health.lua: |
1716+
hs = {}
1717+
hs.status = "Missing"
1718+
hs.message = ""
1719+
return hs`,
1720+
},
1721+
expectedStatus: health.HealthStatusMissing,
1722+
},
1723+
{
1724+
name: "Missing to Progressing",
1725+
app: newFakeAppWithHealthAndTime(health.HealthStatusMissing, testTimestamp),
1726+
configMapData: map[string]string{
1727+
"resource.customizations": `
1728+
apps/Deployment:
1729+
health.lua: |
1730+
hs = {}
1731+
hs.status = "Progressing"
1732+
hs.message = ""
1733+
return hs`,
1734+
},
1735+
expectedStatus: health.HealthStatusProgressing,
1736+
},
1737+
{
1738+
name: "Progressing to Healthy",
1739+
app: newFakeAppWithHealthAndTime(health.HealthStatusProgressing, testTimestamp),
1740+
configMapData: map[string]string{
1741+
"resource.customizations": `
1742+
apps/Deployment:
1743+
health.lua: |
1744+
hs = {}
1745+
hs.status = "Healthy"
1746+
hs.message = ""
1747+
return hs`,
1748+
},
1749+
expectedStatus: health.HealthStatusHealthy,
1750+
},
1751+
{
1752+
name: "Healthy to Degraded",
1753+
app: newFakeAppWithHealthAndTime(health.HealthStatusHealthy, testTimestamp),
1754+
configMapData: map[string]string{
1755+
"resource.customizations": `
1756+
apps/Deployment:
1757+
health.lua: |
1758+
hs = {}
1759+
hs.status = "Degraded"
1760+
hs.message = ""
1761+
return hs`,
1762+
},
1763+
expectedStatus: health.HealthStatusDegraded,
1764+
},
1765+
}
1766+
1767+
for _, tc := range testCases {
1768+
t.Run(tc.name, func(t *testing.T) {
1769+
ctrl := newFakeController(&fakeData{
1770+
apps: []runtime.Object{tc.app, &defaultProj},
1771+
manifestResponse: &apiclient.ManifestResponse{
1772+
Manifests: []string{},
1773+
Namespace: test.FakeDestNamespace,
1774+
Server: test.FakeClusterURL,
1775+
Revision: "abc123",
1776+
},
1777+
managedLiveObjs: map[kube.ResourceKey]*unstructured.Unstructured{
1778+
kube.GetResourceKey(deployment): deployment,
1779+
},
1780+
configMapData: tc.configMapData,
1781+
}, nil)
1782+
1783+
ctrl.processAppRefreshQueueItem()
1784+
apps, err := ctrl.appLister.List(labels.Everything())
1785+
require.NoError(t, err)
1786+
assert.NotEmpty(t, apps)
1787+
assert.Equal(t, tc.expectedStatus, apps[0].Status.Health.Status)
1788+
assert.NotEqual(t, testTimestamp, *apps[0].Status.Health.LastTransitionTime)
1789+
})
1790+
}
1791+
}
1792+
1793+
func TestUpdateHealthStatusProgression(t *testing.T) {
1794+
app := newFakeAppWithHealthAndTime(health.HealthStatusDegraded, testTimestamp)
1795+
deployment := kube.MustToUnstructured(&v1.Deployment{
1796+
TypeMeta: metav1.TypeMeta{
1797+
APIVersion: "apps/v1",
1798+
Kind: "Deployment",
1799+
},
1800+
ObjectMeta: metav1.ObjectMeta{
1801+
Name: "demo",
1802+
Namespace: "default",
1803+
},
1804+
Status: v1.DeploymentStatus{
1805+
ObservedGeneration: 0,
1806+
},
1807+
})
1808+
configMapData := map[string]string{
1809+
"resource.customizations": `
1810+
apps/Deployment:
1811+
health.lua: |
1812+
hs = {}
1813+
hs.status = ""
1814+
hs.message = ""
1815+
1816+
if obj.metadata ~= nil then
1817+
if obj.metadata.labels ~= nil then
1818+
current_status = obj.metadata.labels["status"]
1819+
if current_status == "Degraded" then
1820+
hs.status = "Missing"
1821+
elseif current_status == "Missing" then
1822+
hs.status = "Progressing"
1823+
elseif current_status == "Progressing" then
1824+
hs.status = "Healthy"
1825+
elseif current_status == "Healthy" then
1826+
hs.status = "Degraded"
1827+
end
1828+
end
1829+
end
1830+
1831+
return hs`,
1832+
}
1833+
ctrl := newFakeControllerWithResync(&fakeData{
1834+
apps: []runtime.Object{app, &defaultProj},
1835+
manifestResponse: &apiclient.ManifestResponse{
1836+
Manifests: []string{},
1837+
Namespace: test.FakeDestNamespace,
1838+
Server: test.FakeClusterURL,
1839+
Revision: "abc123",
1840+
},
1841+
managedLiveObjs: map[kube.ResourceKey]*unstructured.Unstructured{
1842+
kube.GetResourceKey(deployment): deployment,
1843+
},
1844+
configMapData: configMapData,
1845+
manifestResponses: []*apiclient.ManifestResponse{
1846+
{},
1847+
{},
1848+
{},
1849+
{},
1850+
},
1851+
}, time.Millisecond*10, nil)
1852+
1853+
testCases := []struct {
1854+
name string
1855+
initialStatus string
1856+
expectedStatus health.HealthStatusCode
1857+
}{
1858+
{
1859+
name: "Degraded to Missing",
1860+
initialStatus: "Degraded",
1861+
expectedStatus: health.HealthStatusMissing,
1862+
},
1863+
{
1864+
name: "Missing to Progressing",
1865+
initialStatus: "Missing",
1866+
expectedStatus: health.HealthStatusProgressing,
1867+
},
1868+
{
1869+
name: "Progressing to Healthy",
1870+
initialStatus: "Progressing",
1871+
expectedStatus: health.HealthStatusHealthy,
1872+
},
1873+
{
1874+
name: "Healthy to Degraded",
1875+
initialStatus: "Healthy",
1876+
expectedStatus: health.HealthStatusDegraded,
1877+
},
1878+
}
1879+
1880+
for _, tc := range testCases {
1881+
t.Run(tc.name, func(t *testing.T) {
1882+
deployment.SetLabels(map[string]string{"status": tc.initialStatus})
1883+
ctrl.processAppRefreshQueueItem()
1884+
apps, err := ctrl.appLister.List(labels.Everything())
1885+
require.NoError(t, err)
1886+
if assert.NotEmpty(t, apps) {
1887+
assert.Equal(t, tc.expectedStatus, apps[0].Status.Health.Status)
1888+
assert.NotEqual(t, testTimestamp, *apps[0].Status.Health.LastTransitionTime)
1889+
}
1890+
1891+
ctrl.requestAppRefresh(app.Name, nil, nil)
1892+
time.Sleep(time.Millisecond * 15)
1893+
})
1894+
}
1895+
}
1896+
16721897
func TestProjectErrorToCondition(t *testing.T) {
16731898
app := newFakeApp()
16741899
app.Spec.Project = "wrong project"

controller/health.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/argoproj/gitops-engine/pkg/sync/ignore"
99
kubeutil "github.com/argoproj/gitops-engine/pkg/utils/kube"
1010
log "github.com/sirupsen/logrus"
11+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1112
"k8s.io/apimachinery/pkg/runtime/schema"
1213

1314
"github.com/argoproj/argo-cd/v2/common"
@@ -20,6 +21,7 @@ import (
2021
func setApplicationHealth(resources []managedResource, statuses []appv1.ResourceStatus, resourceOverrides map[string]appv1.ResourceOverride, app *appv1.Application, persistResourceHealth bool) (*appv1.HealthStatus, error) {
2122
var savedErr error
2223
var errCount uint
24+
2325
appHealth := appv1.HealthStatus{Status: health.HealthStatusHealthy}
2426
for i, res := range resources {
2527
if res.Target != nil && hookutil.Skip(res.Target) {
@@ -80,6 +82,13 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource
8082
}
8183
if persistResourceHealth {
8284
app.Status.ResourceHealthSource = appv1.ResourceHealthLocationInline
85+
// if the status didn't change, don't update the timestamp
86+
if app.Status.Health.Status == appHealth.Status && app.Status.Health.LastTransitionTime != nil {
87+
appHealth.LastTransitionTime = app.Status.Health.LastTransitionTime
88+
} else {
89+
now := metav1.Now()
90+
appHealth.LastTransitionTime = &now
91+
}
8392
} else {
8493
app.Status.ResourceHealthSource = appv1.ResourceHealthLocationAppTree
8594
}

0 commit comments

Comments
 (0)