Skip to content

Commit c4ae5b3

Browse files
committed
feat: allow hook exit codes to control backup execution (e.g fail, skip, etc)
1 parent e96f403 commit c4ae5b3

23 files changed

+723
-286
lines changed

gen/go/types/value.pb.go

+58-7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gen/go/v1/config.pb.go

+353-210
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/hook/errors.go

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package hook
2+
3+
import "fmt"
4+
5+
// HookErrorCancel requests that the calling operation cancel itself. It must be handled explicitly caller. Subsequent hooks will be skipped.
6+
type HookErrorRequestCancel struct {
7+
Err error
8+
}
9+
10+
func (e HookErrorRequestCancel) Error() string {
11+
return fmt.Sprintf("cancel: %v", e.Err.Error())
12+
}
13+
14+
func (e HookErrorRequestCancel) Unwrap() error {
15+
return e.Err
16+
}
17+
18+
// HookErrorFatal stops evaluation of subsequent hooks and will propagate to the hook flow's caller
19+
type HookErrorFatal struct {
20+
Err error
21+
}
22+
23+
func (e HookErrorFatal) Error() string {
24+
return fmt.Sprintf("fatal: %v", e.Err.Error())
25+
}
26+
27+
func (e HookErrorFatal) Unwrap() error {
28+
return e.Err
29+
}

internal/hook/hook.go

+59-18
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package hook
22

33
import (
44
"bytes"
5+
"errors"
56
"fmt"
67
"io"
78
"slices"
@@ -34,15 +35,13 @@ func NewHookExecutor(oplog *oplog.OpLog, bigOutputStore *rotatinglog.RotatingLog
3435

3536
// ExecuteHooks schedules tasks for the hooks subscribed to the given event. The vars map is used to substitute variables
3637
// Hooks are pulled both from the provided plan and from the repo config.
37-
func (e *HookExecutor) ExecuteHooks(repo *v1.Repo, plan *v1.Plan, snapshotId string, events []v1.Hook_Condition, vars HookVars) {
38+
func (e *HookExecutor) ExecuteHooks(repo *v1.Repo, plan *v1.Plan, events []v1.Hook_Condition, vars HookVars) error {
3839
operationBase := v1.Operation{
39-
Status: v1.OperationStatus_STATUS_INPROGRESS,
40-
PlanId: plan.GetId(),
41-
RepoId: repo.GetId(),
42-
SnapshotId: snapshotId,
40+
Status: v1.OperationStatus_STATUS_INPROGRESS,
41+
PlanId: plan.GetId(),
42+
RepoId: repo.GetId(),
4343
}
4444

45-
vars.SnapshotId = snapshotId
4645
vars.Repo = repo
4746
vars.Plan = plan
4847
vars.CurTime = time.Now()
@@ -56,14 +55,20 @@ func (e *HookExecutor) ExecuteHooks(repo *v1.Repo, plan *v1.Plan, snapshotId str
5655

5756
name := fmt.Sprintf("repo/%v/hook/%v", repo.Id, idx)
5857
operation := proto.Clone(&operationBase).(*v1.Operation)
58+
operation.DisplayMessage = "running " + name
5959
operation.UnixTimeStartMs = curTimeMs()
6060
operation.Op = &v1.Operation_OperationRunHook{
6161
OperationRunHook: &v1.OperationRunHook{
6262
Name: name,
6363
},
6464
}
6565
zap.L().Info("running hook", zap.String("plan", plan.Id), zap.Int64("opId", operation.Id), zap.String("hook", name))
66-
e.executeHook(operation, h, event, vars)
66+
if err := e.executeHook(operation, h, event, vars); err != nil {
67+
zap.S().Errorf("error on repo hook %v on condition %v: %v", idx, event.String(), err)
68+
if isHaltingError(err) {
69+
return fmt.Errorf("repo hook %v on condition %v: %w", idx, event.String(), err)
70+
}
71+
}
6772
}
6873

6974
for idx, hook := range plan.GetHooks() {
@@ -75,15 +80,23 @@ func (e *HookExecutor) ExecuteHooks(repo *v1.Repo, plan *v1.Plan, snapshotId str
7580

7681
name := fmt.Sprintf("plan/%v/hook/%v", plan.Id, idx)
7782
operation := proto.Clone(&operationBase).(*v1.Operation)
83+
operation.DisplayMessage = "running " + name
7884
operation.UnixTimeStartMs = curTimeMs()
7985
operation.Op = &v1.Operation_OperationRunHook{
8086
OperationRunHook: &v1.OperationRunHook{
8187
Name: name,
8288
},
8389
}
90+
8491
zap.L().Info("running hook", zap.String("plan", plan.Id), zap.Int64("opId", operation.Id), zap.String("hook", name))
85-
e.executeHook(operation, h, event, vars)
92+
if err := e.executeHook(operation, h, event, vars); err != nil {
93+
zap.S().Errorf("error on plan hook %v on condition %v: %v", idx, event.String(), err)
94+
if isHaltingError(err) {
95+
return fmt.Errorf("plan hook %v on condition %v: %w", idx, event.String(), err)
96+
}
97+
}
8698
}
99+
return nil
87100
}
88101

89102
func firstMatchingCondition(hook *Hook, events []v1.Hook_Condition) v1.Hook_Condition {
@@ -95,35 +108,43 @@ func firstMatchingCondition(hook *Hook, events []v1.Hook_Condition) v1.Hook_Cond
95108
return v1.Hook_CONDITION_UNKNOWN
96109
}
97110

98-
func (e *HookExecutor) executeHook(op *v1.Operation, hook *Hook, event v1.Hook_Condition, vars HookVars) {
111+
func (e *HookExecutor) executeHook(op *v1.Operation, hook *Hook, event v1.Hook_Condition, vars HookVars) error {
99112
if err := e.oplog.Add(op); err != nil {
100113
zap.S().Errorf("execute hook: add operation: %v", err)
101-
return
114+
return errors.New("couldn't create operation")
102115
}
103116

104117
output := &bytes.Buffer{}
118+
fmt.Fprintf(output, "triggering condition: %v\n", event.String())
105119

120+
var retErr error
106121
if err := hook.Do(event, vars, io.MultiWriter(output)); err != nil {
107122
output.Write([]byte(fmt.Sprintf("Error: %v", err)))
108-
op.DisplayMessage = err.Error()
109-
op.Status = v1.OperationStatus_STATUS_ERROR
110-
zap.S().Errorf("execute hook: %v", err)
123+
err = applyHookErrorPolicy(hook.OnError, err)
124+
var cancelErr *HookErrorRequestCancel
125+
if errors.As(err, &cancelErr) {
126+
// if it was a cancel then it successfully indicated it's intent to the caller
127+
// no error should be displayed in the UI.
128+
op.Status = v1.OperationStatus_STATUS_SUCCESS
129+
} else {
130+
op.Status = v1.OperationStatus_STATUS_ERROR
131+
}
132+
retErr = err
111133
} else {
112134
op.Status = v1.OperationStatus_STATUS_SUCCESS
113135
}
114136

115137
outputRef, err := e.logStore.Write(output.Bytes())
116138
if err != nil {
117-
zap.S().Errorf("execute hook: write log: %v", err)
118-
return
139+
retErr = errors.Join(retErr, fmt.Errorf("write logstore: %w", err))
119140
}
120-
op.Op.(*v1.Operation_OperationRunHook).OperationRunHook.OutputLogref = outputRef
141+
op.Logref = outputRef
121142

122143
op.UnixTimeEndMs = curTimeMs()
123144
if err := e.oplog.Update(op); err != nil {
124-
zap.S().Errorf("execute hook: update operation: %v", err)
125-
return
145+
retErr = errors.Join(retErr, fmt.Errorf("update oplog: %w", err))
126146
}
147+
return retErr
127148
}
128149

129150
func curTimeMs() int64 {
@@ -175,3 +196,23 @@ func (h *Hook) renderTemplateOrDefault(template string, defaultTmpl string, vars
175196
}
176197
return h.renderTemplate(template, vars)
177198
}
199+
200+
func applyHookErrorPolicy(onError v1.Hook_OnError, err error) error {
201+
if err == nil || errors.As(err, &HookErrorFatal{}) || errors.As(err, &HookErrorRequestCancel{}) {
202+
return err
203+
}
204+
205+
if onError == v1.Hook_ON_ERROR_CANCEL {
206+
return &HookErrorRequestCancel{Err: err}
207+
} else if onError == v1.Hook_ON_ERROR_FATAL {
208+
return &HookErrorFatal{Err: err}
209+
}
210+
return err
211+
}
212+
213+
// isHaltingError returns true if the error is a fatal error or a request to cancel the operation
214+
func isHaltingError(err error) bool {
215+
var fatalErr *HookErrorFatal
216+
var cancelErr *HookErrorRequestCancel
217+
return errors.As(err, &fatalErr) || errors.As(err, &cancelErr)
218+
}

internal/hook/hook_test.go

+28
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package hook
22

33
import (
44
"bytes"
5+
"errors"
56
"os/exec"
67
"runtime"
78
"testing"
@@ -56,3 +57,30 @@ exit $counter`,
5657
t.Fatalf("expected exit code 3, got %v", err.(*exec.ExitError).ExitCode())
5758
}
5859
}
60+
61+
func TestCommandHookErrorHandling(t *testing.T) {
62+
if runtime.GOOS == "windows" {
63+
t.Skip("skipping test on windows")
64+
}
65+
66+
hook := Hook(v1.Hook{
67+
Conditions: []v1.Hook_Condition{
68+
v1.Hook_CONDITION_SNAPSHOT_START,
69+
},
70+
Action: &v1.Hook_ActionCommand{
71+
ActionCommand: &v1.Hook_Command{
72+
Command: "exit 1",
73+
},
74+
},
75+
OnError: v1.Hook_ON_ERROR_CANCEL,
76+
})
77+
78+
err := applyHookErrorPolicy(hook.OnError, hook.Do(v1.Hook_CONDITION_SNAPSHOT_START, HookVars{}, &bytes.Buffer{}))
79+
if err == nil {
80+
t.Fatal("expected error")
81+
}
82+
var cancelErr *HookErrorRequestCancel
83+
if !errors.As(err, &cancelErr) {
84+
t.Fatalf("expected HookErrorRequestCancel, got %v", err)
85+
}
86+
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

internal/orchestrator/taskbackup.go

+32-7
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,30 @@ func backupHelper(ctx context.Context, t Task, orchestrator *Orchestrator, plan
108108
return fmt.Errorf("couldn't get repo %q: %w", plan.Repo, err)
109109
}
110110

111-
orchestrator.hookExecutor.ExecuteHooks(repo.Config(), plan, "", []v1.Hook_Condition{
111+
// Run start hooks e.g. preflight checks and backup start notifications.
112+
if err := orchestrator.hookExecutor.ExecuteHooks(repo.Config(), plan, []v1.Hook_Condition{
112113
v1.Hook_CONDITION_SNAPSHOT_START,
113114
}, hook.HookVars{
114115
Task: t.Name(),
115-
})
116+
}); err != nil {
117+
var cancelErr *hook.HookErrorRequestCancel
118+
if errors.As(err, &cancelErr) {
119+
op.Status = v1.OperationStatus_STATUS_USER_CANCELLED // user visible cancelled status
120+
op.DisplayMessage = err.Error()
121+
return nil
122+
}
123+
124+
// If the snapshot start hook fails we trigger error notification hooks.
125+
retErr := fmt.Errorf("hook failed: %w", err)
126+
_ = orchestrator.hookExecutor.ExecuteHooks(repo.Config(), plan, []v1.Hook_Condition{
127+
v1.Hook_CONDITION_ANY_ERROR,
128+
}, hook.HookVars{
129+
Task: t.Name(),
130+
Error: retErr.Error(),
131+
})
132+
133+
return retErr
134+
}
116135

117136
var sendWg sync.WaitGroup
118137
lastSent := time.Now() // debounce progress updates, these can endup being very frequent.
@@ -160,24 +179,30 @@ func backupHelper(ctx context.Context, t Task, orchestrator *Orchestrator, plan
160179
}()
161180
})
162181

182+
sendWg.Wait()
183+
184+
if summary == nil {
185+
summary = &restic.BackupProgressEntry{}
186+
}
187+
163188
vars := hook.HookVars{
164189
Task: t.Name(),
165190
SnapshotStats: summary,
191+
SnapshotId: summary.SnapshotId,
166192
}
167193
if err != nil {
168194
vars.Error = err.Error()
169-
orchestrator.hookExecutor.ExecuteHooks(repo.Config(), plan, "", []v1.Hook_Condition{
170-
v1.Hook_CONDITION_SNAPSHOT_ERROR, v1.Hook_CONDITION_ANY_ERROR,
171-
}, vars)
172-
173195
if !errors.Is(err, restic.ErrPartialBackup) {
196+
_ = orchestrator.hookExecutor.ExecuteHooks(repo.Config(), plan, []v1.Hook_Condition{
197+
v1.Hook_CONDITION_SNAPSHOT_ERROR, v1.Hook_CONDITION_ANY_ERROR,
198+
}, vars)
174199
return fmt.Errorf("repo.Backup for repo %q: %w", plan.Repo, err)
175200
}
176201
op.Status = v1.OperationStatus_STATUS_WARNING
177202
op.DisplayMessage = "Partial backup, some files may not have been read completely."
178203
}
179204

180-
orchestrator.hookExecutor.ExecuteHooks(repo.Config(), plan, summary.SnapshotId, []v1.Hook_Condition{
205+
orchestrator.hookExecutor.ExecuteHooks(repo.Config(), plan, []v1.Hook_Condition{
181206
v1.Hook_CONDITION_SNAPSHOT_END,
182207
}, vars)
183208

0 commit comments

Comments
 (0)