Skip to content

Commit ff2eb27

Browse files
authored
[Bugfix] Propagate spec before pod restart (#782)
1 parent f77a5d6 commit ff2eb27

11 files changed

+105
-80
lines changed

pkg/deployment/reconcile/action.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,38 @@ type Action interface {
5353
MemberID() string
5454
}
5555

56+
// ActionReloadCachedStatus keeps information about CachedStatus reloading (executed after action has been executed)
57+
type ActionReloadCachedStatus interface {
58+
Action
59+
60+
// ReloadCachedStatus keeps information about CachedStatus reloading (executed after action has been executed)
61+
ReloadCachedStatus() bool
62+
}
63+
64+
func getActionReloadCachedStatus(a Action) bool {
65+
if c, ok := a.(ActionReloadCachedStatus); !ok {
66+
return false
67+
} else {
68+
return c.ReloadCachedStatus()
69+
}
70+
}
71+
72+
// ActionPlanAppender modify plan after action execution
73+
type ActionPlanAppender interface {
74+
Action
75+
76+
// ActionPlanAppender modify plan after action execution
77+
ActionPlanAppender(current api.Plan) api.Plan
78+
}
79+
80+
func getActionPlanAppender(a Action, plan api.Plan) api.Plan {
81+
if c, ok := a.(ActionPlanAppender); !ok {
82+
return plan
83+
} else {
84+
return c.ActionPlanAppender(plan)
85+
}
86+
}
87+
5688
type actionFactory func(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action
5789

5890
var (

pkg/deployment/reconcile/action_add_member.go

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ func newAddMemberAction(log zerolog.Logger, action api.Action, actionCtx ActionC
5050
return a
5151
}
5252

53+
var _ ActionPlanAppender = &actionAddMember{}
54+
5355
// actionAddMember implements an AddMemberAction.
5456
type actionAddMember struct {
5557
// actionImpl implement timeout and member id functions
@@ -72,19 +74,19 @@ func (a *actionAddMember) Start(ctx context.Context) (bool, error) {
7274
}
7375
a.newMemberID = newID
7476

77+
return true, nil
78+
}
79+
80+
// ActionPlanAppender appends wait methods to the plan
81+
func (a *actionAddMember) ActionPlanAppender(current api.Plan) api.Plan {
82+
var app api.Plan
83+
7584
if _, ok := a.action.Params[api.ActionTypeWaitForMemberUp.String()]; ok {
76-
a.actionCtx.WithStatusUpdate(ctx, func(s *api.DeploymentStatus) bool {
77-
s.Plan = append(s.Plan, api.NewAction(api.ActionTypeWaitForMemberInSync, a.action.Group, newID, "Wait for member in sync after creation"))
78-
return true
79-
})
85+
app = append(app, api.NewAction(api.ActionTypeWaitForMemberUp, a.action.Group, a.newMemberID, "Wait for member in sync after creation"))
8086
}
8187

82-
if _, ok := a.action.Params[api.ActionTypeWaitForMemberInSync.String()]; ok {
83-
a.actionCtx.WithStatusUpdate(ctx, func(s *api.DeploymentStatus) bool {
84-
s.Plan = append(s.Plan, api.NewAction(api.ActionTypeWaitForMemberInSync, a.action.Group, newID, "Wait for member in sync after creation"))
85-
return true
86-
})
88+
if _, ok := a.action.Params[api.ActionTypeWaitForMemberUp.String()]; ok {
89+
app = append(app, api.NewAction(api.ActionTypeWaitForMemberInSync, a.action.Group, a.newMemberID, "Wait for member in sync after creation"))
8790
}
88-
89-
return true, nil
91+
return append(app, current...)
9092
}

pkg/deployment/reconcile/action_arango_membed_update_pod_spec.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ func newArangoMemberUpdatePodSpecAction(log zerolog.Logger, action api.Action, a
4949
return a
5050
}
5151

52+
var _ ActionReloadCachedStatus = &actionArangoMemberUpdatePodSpec{}
53+
5254
// actionArangoMemberUpdatePodSpec implements an ArangoMemberUpdatePodSpec.
5355
type actionArangoMemberUpdatePodSpec struct {
5456
// actionImpl implement timeout and member id functions
@@ -150,3 +152,7 @@ func (a *actionArangoMemberUpdatePodSpec) Start(ctx context.Context) (bool, erro
150152

151153
return true, nil
152154
}
155+
156+
func (a *actionArangoMemberUpdatePodSpec) ReloadCachedStatus() bool {
157+
return true
158+
}

pkg/deployment/reconcile/action_pvc_resize.go

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -96,20 +96,6 @@ func (a *actionPVCResize) Start(ctx context.Context) (bool, error) {
9696
return false, err
9797
}
9898

99-
return false, nil
100-
} else if cmp > 0 {
101-
if groupSpec.GetVolumeAllowShrink() && group == api.ServerGroupDBServers {
102-
if err := a.actionCtx.WithStatusUpdate(ctx, func(s *api.DeploymentStatus) bool {
103-
s.Plan = append(s.Plan, api.NewAction(api.ActionTypeMarkToRemoveMember, group, m.ID))
104-
return true
105-
}); err != nil {
106-
log.Error().Err(err).Msg("Unable to mark instance to be replaced")
107-
}
108-
} else {
109-
log.Error().Str("server-group", group.AsRole()).Str("pvc-storage-size", volumeSize.String()).Str("requested-size", requestedSize.String()).
110-
Msg("Volume size should not shrink")
111-
a.actionCtx.CreateEvent(k8sutil.NewCannotShrinkVolumeEvent(a.actionCtx.GetAPIObject(), pvc.Name))
112-
}
11399
return false, nil
114100
}
115101
}

pkg/deployment/reconcile/action_upgrade_member.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ func (a *actionUpgradeMember) Start(ctx context.Context) (bool, error) {
6262
}
6363
// Set AutoUpgrade condition
6464
m.Conditions.Update(api.ConditionTypeAutoUpgrade, true, "Upgrading", "AutoUpgrade on first restart")
65+
6566
if err := a.actionCtx.UpdateMember(ctx, m); err != nil {
6667
return false, errors.WithStack(err)
6768
}
@@ -134,6 +135,7 @@ func (a *actionUpgradeMember) CheckProgress(ctx context.Context) (bool, bool, er
134135
} else if !ok {
135136
return false, false, nil
136137
}
138+
137139
// Pod is now upgraded, update the member status
138140
m.Phase = api.MemberPhaseCreated
139141
m.RecentTerminations = nil // Since we're upgrading, we do not care about old terminations.

pkg/deployment/reconcile/plan_builder_high.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,9 @@ func updateMemberPhasePlan(ctx context.Context,
126126
api.NewAction(api.ActionTypeMemberRIDUpdate, group, m.ID, "Regenerate member RID"),
127127
}
128128

129-
p = append(p, api.NewAction(api.ActionTypeArangoMemberUpdatePodStatus, group, m.ID, "Propagating status of pod"))
129+
p = append(p,
130+
api.NewAction(api.ActionTypeArangoMemberUpdatePodSpec, group, m.ID, "Propagating spec of pod"),
131+
api.NewAction(api.ActionTypeArangoMemberUpdatePodStatus, group, m.ID, "Propagating status of pod"))
130132

131133
p = append(p, api.NewAction(api.ActionTypeMemberPhaseUpdate, group, m.ID,
132134
"Move to Pending phase").AddParam(ActionTypeMemberPhaseUpdatePhaseKey, api.MemberPhasePending.String()))

pkg/deployment/reconcile/plan_builder_normal.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,11 @@ func createRemoveCleanedDBServersPlan(ctx context.Context,
200200
spec api.DeploymentSpec, status api.DeploymentStatus,
201201
cachedStatus inspectorInterface.Inspector, context PlanBuilderContext) api.Plan {
202202
for _, m := range status.Members.DBServers {
203+
if !m.Phase.IsReady() {
204+
// Ensure that we CleanOut members which are Ready only to ensure data will be moved
205+
continue
206+
}
207+
203208
if m.Phase.IsCreatedOrDrain() && m.Conditions.IsTrue(api.ConditionTypeCleanedOut) {
204209
log.Debug().
205210
Str("id", m.ID).

pkg/deployment/reconcile/plan_builder_storage.go

Lines changed: 28 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,10 @@ func createRotateServerStoragePlan(ctx context.Context,
4545
return nil
4646
}
4747
var plan api.Plan
48+
var canContinue = true
4849
status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error {
4950
for _, m := range members {
50-
if !plan.IsEmpty() {
51+
if !plan.IsEmpty() && !canContinue {
5152
// Only 1 change at a time
5253
continue
5354
}
@@ -73,21 +74,17 @@ func createRotateServerStoragePlan(ctx context.Context,
7374
}
7475

7576
if util.StringOrDefault(pvc.Spec.StorageClassName) != storageClassName && storageClassName != "" {
77+
// Do not append more than 1 operation if we replace storageClass
78+
canContinue = false
79+
7680
// Storageclass has changed
7781
log.Info().Str("pod-name", m.PodName).
7882
Str("pvc-storage-class", util.StringOrDefault(pvc.Spec.StorageClassName)).
7983
Str("group-storage-class", storageClassName).Msg("Storage class has changed - pod needs replacement")
8084

8185
if group == api.ServerGroupDBServers {
8286
plan = append(plan,
83-
api.NewAction(api.ActionTypeDisableClusterScaling, group, ""),
84-
api.NewAction(api.ActionTypeAddMember, group, ""),
85-
api.NewAction(api.ActionTypeWaitForMemberUp, group, api.MemberIDPreviousAction),
86-
api.NewAction(api.ActionTypeCleanOutMember, group, m.ID),
87-
api.NewAction(api.ActionTypeShutdownMember, group, m.ID),
88-
api.NewAction(api.ActionTypeRemoveMember, group, m.ID),
89-
api.NewAction(api.ActionTypeEnableClusterScaling, group, ""),
90-
)
87+
api.NewAction(api.ActionTypeMarkToRemoveMember, group, m.ID))
9188
} else if group == api.ServerGroupAgents {
9289
plan = append(plan,
9390
api.NewAction(api.ActionTypeShutdownMember, group, m.ID),
@@ -100,25 +97,30 @@ func createRotateServerStoragePlan(ctx context.Context,
10097
context.CreateEvent(k8sutil.NewCannotChangeStorageClassEvent(apiObject, m.ID, group.AsRole(), "Not supported"))
10198
}
10299
} else {
100+
var res core.ResourceList
103101
if groupSpec.HasVolumeClaimTemplate() {
104-
res := groupSpec.GetVolumeClaimTemplate().Spec.Resources.Requests
105-
// For pvc only resources.requests is mutable
106-
if comparePVCResourceList(pvc.Spec.Resources.Requests, res) {
107-
plan = append(plan, pvcResizePlan(log, group, groupSpec, m.ID)...)
108-
}
102+
res = groupSpec.GetVolumeClaimTemplate().Spec.Resources.Requests
109103
} else {
110-
if requestedSize, ok := groupSpec.Resources.Requests[core.ResourceStorage]; ok {
111-
if volumeSize, ok := pvc.Spec.Resources.Requests[core.ResourceStorage]; ok {
112-
cmp := volumeSize.Cmp(requestedSize)
113-
if cmp < 0 {
114-
plan = append(plan, pvcResizePlan(log, group, groupSpec, m.ID)...)
115-
} else if cmp > 0 {
116-
if groupSpec.GetVolumeAllowShrink() && group == api.ServerGroupDBServers {
117-
plan = append(plan, api.NewAction(api.ActionTypeMarkToRemoveMember, group, m.ID))
118-
} else {
119-
log.Error().Str("server-group", group.AsRole()).Str("pvc-storage-size", volumeSize.String()).Str("requested-size", requestedSize.String()).
120-
Msg("Volume size should not shrink")
121-
}
104+
res = groupSpec.Resources.Requests
105+
}
106+
if requestedSize, ok := res[core.ResourceStorage]; ok {
107+
if volumeSize, ok := pvc.Spec.Resources.Requests[core.ResourceStorage]; ok {
108+
cmp := volumeSize.Cmp(requestedSize)
109+
if cmp < 0 {
110+
if groupSpec.VolumeResizeMode.Get() == api.PVCResizeModeRotate {
111+
// Do not append more than 1 operation if we hard restart member
112+
canContinue = false
113+
}
114+
plan = append(plan, pvcResizePlan(log, group, groupSpec, m.ID)...)
115+
} else if cmp > 0 {
116+
// Do not append more than 1 operation if we schrink volume
117+
canContinue = false
118+
119+
if groupSpec.GetVolumeAllowShrink() && group == api.ServerGroupDBServers && !m.Conditions.IsTrue(api.ConditionTypeMarkedToRemove) {
120+
plan = append(plan, api.NewAction(api.ActionTypeMarkToRemoveMember, group, m.ID))
121+
} else {
122+
log.Error().Str("server-group", group.AsRole()).Str("pvc-storage-size", volumeSize.String()).Str("requested-size", requestedSize.String()).
123+
Msg("Volume size should not shrink")
122124
}
123125
}
124126
}
@@ -153,21 +155,3 @@ func pvcResizePlan(log zerolog.Logger, group api.ServerGroup, groupSpec api.Serv
153155
return nil
154156
}
155157
}
156-
157-
func comparePVCResourceList(wanted, given core.ResourceList) bool {
158-
for k, v := range wanted {
159-
if gv, ok := given[k]; !ok {
160-
return true
161-
} else if v.Cmp(gv) != 0 {
162-
return true
163-
}
164-
}
165-
166-
for k := range given {
167-
if _, ok := wanted[k]; !ok {
168-
return true
169-
}
170-
}
171-
172-
return false
173-
}

pkg/deployment/reconcile/plan_builder_test.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -693,13 +693,7 @@ func TestCreatePlan(t *testing.T) {
693693
ad.Status.Members.DBServers[0].PersistentVolumeClaimName = pvcName
694694
},
695695
ExpectedPlan: []api.Action{
696-
api.NewAction(api.ActionTypeDisableClusterScaling, api.ServerGroupDBServers, ""),
697-
api.NewAction(api.ActionTypeAddMember, api.ServerGroupDBServers, ""),
698-
api.NewAction(api.ActionTypeWaitForMemberUp, api.ServerGroupDBServers, ""),
699-
api.NewAction(api.ActionTypeCleanOutMember, api.ServerGroupDBServers, ""),
700-
api.NewAction(api.ActionTypeShutdownMember, api.ServerGroupDBServers, ""),
701-
api.NewAction(api.ActionTypeRemoveMember, api.ServerGroupDBServers, ""),
702-
api.NewAction(api.ActionTypeEnableClusterScaling, api.ServerGroupDBServers, ""),
696+
api.NewAction(api.ActionTypeMarkToRemoveMember, api.ServerGroupDBServers, ""),
703697
},
704698
ExpectedLog: "Storage class has changed - pod needs replacement",
705699
},

pkg/deployment/reconcile/plan_executor.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,20 @@ func (d *Reconciler) executePlan(ctx context.Context, cachedStatus inspectorInte
171171
} else {
172172
plan = nil
173173
}
174+
175+
if getActionReloadCachedStatus(action) {
176+
log.Info().Msgf("Reloading cached status")
177+
if err := cachedStatus.Refresh(ctx); err != nil {
178+
log.Warn().Err(err).Msgf("Unable to reload cached status")
179+
return plan, recall, nil
180+
}
181+
}
182+
183+
if newPlan := getActionPlanAppender(action, plan); !newPlan.Equal(plan) {
184+
// Our actions have been added to the end of plan
185+
log.Info().Msgf("Appending new plan items")
186+
return newPlan, true, nil
187+
}
174188
} else {
175189
if plan[0].StartTime.IsZero() {
176190
now := metav1.Now()

pkg/deployment/resources/pod_creator.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,7 @@ func (r *Resources) createPodForMember(ctx context.Context, spec api.DeploymentS
632632
m.Conditions.Remove(api.ConditionTypePendingTLSRotation)
633633
m.Conditions.Remove(api.ConditionTypePendingRestart)
634634
m.Conditions.Remove(api.ConditionTypeRestart)
635+
m.Conditions.Remove(api.ConditionTypeCleanedOut)
635636

636637
m.Upgrade = false
637638
r.log.Info().Str("pod", m.PodName).Msgf("Updating member")
@@ -750,9 +751,6 @@ func (r *Resources) EnsurePods(ctx context.Context, cachedStatus inspectorInterf
750751
if m.Phase != api.MemberPhasePending {
751752
continue
752753
}
753-
if m.Conditions.IsTrue(api.ConditionTypeCleanedOut) {
754-
continue
755-
}
756754

757755
member, ok := cachedStatus.ArangoMember(m.ArangoMemberName(r.context.GetName(), group))
758756
if !ok {

0 commit comments

Comments
 (0)