Skip to content

Commit c0d9a3e

Browse files
authored
STAC-24393: Remove timeout for the restore operations (#21)
1 parent ae8a01d commit c0d9a3e

4 files changed

Lines changed: 33 additions & 50 deletions

File tree

cmd/clickhouse/check_and_finalize.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ import (
1515
)
1616

1717
const (
18-
defaultRestoreTimeout = 30 * time.Minute
19-
defaultPollInterval = 10 * time.Second
18+
defaultPollInterval = 10 * time.Second
2019
)
2120

2221
// Check-and-finalize command flags
@@ -117,7 +116,7 @@ func waitAndFinalize(appCtx *app.Context, chClient clickhouse.Interface, operati
117116
}
118117
}
119118

120-
if err := restore.WaitForAPIRestore(checkStatusFn, defaultPollInterval, defaultRestoreTimeout, appCtx.Logger); err != nil {
119+
if err := restore.WaitForAPIRestore(checkStatusFn, defaultPollInterval, appCtx.Logger); err != nil {
121120
return err
122121
}
123122

cmd/elasticsearch/check_and_finalize.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ func waitAndFinalize(appCtx *app.Context, repository, snapshotName string) error
105105
return appCtx.ESClient.GetRestoreStatus(repository, snapshotName)
106106
}
107107

108-
if err := restore.WaitForAPIRestore(checkStatusFn, 0, 0, appCtx.Logger); err != nil {
108+
if err := restore.WaitForAPIRestore(checkStatusFn, 0, appCtx.Logger); err != nil {
109109
return err
110110
}
111111

internal/orchestration/restore/apirestore.go

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import (
88
)
99

1010
const (
11-
defaultAPIRestoreTimeout = 30 * time.Minute
1211
defaultAPIStatusCheckInterval = 10 * time.Second
1312
)
1413

@@ -18,39 +17,30 @@ const (
1817
func WaitForAPIRestore(
1918
checkStatusFn func() (string, bool, error),
2019
interval time.Duration,
21-
timeout time.Duration,
2220
log *logger.Logger,
2321
) error {
2422
if interval == 0 {
2523
interval = defaultAPIStatusCheckInterval
2624
}
27-
if timeout == 0 {
28-
timeout = defaultAPIRestoreTimeout
29-
}
3025

31-
timeoutChan := time.After(timeout)
3226
ticker := time.NewTicker(interval)
3327
defer ticker.Stop()
3428

3529
for {
36-
select {
37-
case <-timeoutChan:
38-
return fmt.Errorf("timeout waiting for restore to complete")
39-
case <-ticker.C:
40-
statusMsg, isComplete, err := checkStatusFn()
41-
if err != nil {
42-
return fmt.Errorf("failed to check restore status: %w", err)
43-
}
30+
<-ticker.C
31+
statusMsg, isComplete, err := checkStatusFn()
32+
if err != nil {
33+
return fmt.Errorf("failed to check restore status: %w", err)
34+
}
4435

45-
log.Debugf("Restore status: %s (complete: %v)", statusMsg, isComplete)
36+
log.Debugf("Restore status: %s (complete: %v)", statusMsg, isComplete)
4637

47-
if isComplete {
48-
if statusMsg == "SUCCESS" || statusMsg == "PARTIAL" {
49-
log.Debugf("Restore completed successfully")
50-
return nil
51-
}
52-
return fmt.Errorf("restore failed with status: %s", statusMsg)
38+
if isComplete {
39+
if statusMsg == "SUCCESS" || statusMsg == "PARTIAL" {
40+
log.Debugf("Restore completed successfully")
41+
return nil
5342
}
43+
return fmt.Errorf("restore failed with status: %s", statusMsg)
5444
}
5545
}
5646
}

internal/orchestration/restore/job.go

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,44 +9,38 @@ import (
99
)
1010

1111
const (
12-
defaultJobCompletionTimeout = 30 * time.Minute
1312
defaultJobStatusCheckInterval = 10 * time.Second
1413
)
1514

1615
// WaitForJobCompletion waits for a Kubernetes job to complete
1716
func WaitForJobCompletion(k8sClient *k8s.Client, namespace, jobName string, log *logger.Logger) error {
18-
timeout := time.After(defaultJobCompletionTimeout)
1917
ticker := time.NewTicker(defaultJobStatusCheckInterval)
2018
defer ticker.Stop()
2119

2220
for {
23-
select {
24-
case <-timeout:
25-
return fmt.Errorf("timeout waiting for job to complete")
26-
case <-ticker.C:
27-
job, err := k8sClient.GetJob(namespace, jobName)
28-
if err != nil {
29-
return fmt.Errorf("failed to get job status: %w", err)
30-
}
21+
<-ticker.C
22+
job, err := k8sClient.GetJob(namespace, jobName)
23+
if err != nil {
24+
return fmt.Errorf("failed to get job status: %w", err)
25+
}
3126

32-
if job.Status.Succeeded > 0 {
33-
return nil
34-
}
27+
if job.Status.Succeeded > 0 {
28+
return nil
29+
}
3530

36-
if job.Status.Failed > 0 {
37-
// Get and print logs from failed job
38-
log.Println()
39-
log.Errorf("Job failed. Fetching logs...")
40-
log.Println()
41-
if err := PrintJobLogs(k8sClient, namespace, jobName, log); err != nil {
42-
log.Warningf("Failed to fetch job logs: %v", err)
43-
}
44-
return fmt.Errorf("job failed")
31+
if job.Status.Failed > 0 {
32+
// Get and print logs from failed job
33+
log.Println()
34+
log.Errorf("Job failed. Fetching logs...")
35+
log.Println()
36+
if err := PrintJobLogs(k8sClient, namespace, jobName, log); err != nil {
37+
log.Warningf("Failed to fetch job logs: %v", err)
4538
}
46-
47-
log.Debugf("Job status: Active=%d, Succeeded=%d, Failed=%d",
48-
job.Status.Active, job.Status.Succeeded, job.Status.Failed)
39+
return fmt.Errorf("job failed")
4940
}
41+
42+
log.Debugf("Job status: Active=%d, Succeeded=%d, Failed=%d",
43+
job.Status.Active, job.Status.Succeeded, job.Status.Failed)
5044
}
5145
}
5246

0 commit comments

Comments
 (0)