Skip to content

Commit f368378

Browse files
authored
Merge pull request #2477 from k8s-infra-cherrypick-robot/cherry-pick-2475-to-release-0.12
[release-0.12] Fix panic when OpenStack server is deleted by an external agent
2 parents 49b1177 + c59bac1 commit f368378

File tree

7 files changed

+256
-21
lines changed

7 files changed

+256
-21
lines changed

api/v1beta1/conditions_consts.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ const (
4444
OpenStackErrorReason = "OpenStackError"
4545
// DependencyFailedReason indicates that a dependent object failed.
4646
DependencyFailedReason = "DependencyFailed"
47+
48+
// ServerUnexpectedDeletedMessage is the message used when the server is unexpectedly deleted via an external agent.
49+
ServerUnexpectedDeletedMessage = "The server was unexpectedly deleted"
4750
)
4851

4952
const (

controllers/openstackmachine_controller.go

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,11 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
365365
return ctrl.Result{}, err
366366
}
367367

368+
result := r.reconcileMachineState(scope, openStackMachine, machine, machineServer)
369+
if result != nil {
370+
return *result, nil
371+
}
372+
368373
computeService, err := compute.NewService(scope)
369374
if err != nil {
370375
return ctrl.Result{}, err
@@ -377,6 +382,12 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
377382
return ctrl.Result{}, err
378383
}
379384

385+
if instanceStatus == nil {
386+
msg := "server has been unexpectedly deleted"
387+
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceDeletedReason, clusterv1.ConditionSeverityError, msg)
388+
openStackMachine.SetFailure(capoerrors.DeprecatedCAPIUpdateMachineError, errors.New(msg))
389+
}
390+
380391
instanceNS, err := instanceStatus.NetworkStatus()
381392
if err != nil {
382393
return ctrl.Result{}, fmt.Errorf("get network status: %w", err)
@@ -393,22 +404,15 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
393404
})
394405
openStackMachine.Status.Addresses = addresses
395406

396-
result := r.reconcileMachineState(scope, openStackMachine, machine, machineServer)
397-
if result != nil {
398-
return *result, nil
399-
}
400-
401-
if !util.IsControlPlaneMachine(machine) {
402-
scope.Logger().Info("Not a Control plane machine, no floating ip reconcile needed, Reconciled Machine create successfully")
403-
return ctrl.Result{}, nil
404-
}
407+
if util.IsControlPlaneMachine(machine) {
408+
err = r.reconcileAPIServerLoadBalancer(scope, openStackCluster, openStackMachine, instanceStatus, instanceNS, clusterResourceName)
409+
if err != nil {
410+
return ctrl.Result{}, err
411+
}
405412

406-
err = r.reconcileAPIServerLoadBalancer(scope, openStackCluster, openStackMachine, instanceStatus, instanceNS, clusterResourceName)
407-
if err != nil {
408-
return ctrl.Result{}, err
413+
conditions.MarkTrue(openStackMachine, infrav1.APIServerIngressReadyCondition)
409414
}
410415

411-
conditions.MarkTrue(openStackMachine, infrav1.APIServerIngressReadyCondition)
412416
scope.Logger().Info("Reconciled Machine create successfully")
413417
return ctrl.Result{}, nil
414418
}

controllers/openstackserver_controller.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -433,9 +433,18 @@ func (r *OpenStackServerReconciler) getOrCreateServer(ctx context.Context, logge
433433

434434
if openStackServer.Status.InstanceID != nil {
435435
instanceStatus, err = computeService.GetInstanceStatus(*openStackServer.Status.InstanceID)
436-
if err != nil {
437-
logger.Info("Unable to get OpenStack instance", "name", openStackServer.Name)
438-
conditions.MarkFalse(openStackServer, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, "%s", err.Error())
436+
if err != nil || instanceStatus == nil {
437+
logger.Info("Unable to get OpenStack instance", "name", openStackServer.Name, "id", *openStackServer.Status.InstanceID)
438+
var msg string
439+
var reason string
440+
if err != nil {
441+
msg = err.Error()
442+
reason = infrav1.OpenStackErrorReason
443+
} else {
444+
msg = infrav1.ServerUnexpectedDeletedMessage
445+
reason = infrav1.InstanceNotFoundReason
446+
}
447+
conditions.MarkFalse(openStackServer, infrav1.InstanceReadyCondition, reason, clusterv1.ConditionSeverityError, msg)
439448
return nil, err
440449
}
441450
}
@@ -450,11 +459,6 @@ func (r *OpenStackServerReconciler) getOrCreateServer(ctx context.Context, logge
450459
logger.Info("Server already exists", "name", openStackServer.Name, "id", instanceStatus.ID())
451460
return instanceStatus, nil
452461
}
453-
if openStackServer.Status.InstanceID != nil {
454-
logger.Info("Not reconciling server in failed state. The previously existing OpenStack instance is no longer available")
455-
conditions.MarkFalse(openStackServer, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, clusterv1.ConditionSeverityError, "virtual machine no longer exists")
456-
return nil, nil
457-
}
458462

459463
logger.Info("Server does not exist, creating Server", "name", openStackServer.Name)
460464
instanceSpec, err := r.serverToInstanceSpec(ctx, openStackServer)
@@ -468,6 +472,8 @@ func (r *OpenStackServerReconciler) getOrCreateServer(ctx context.Context, logge
468472
openStackServer.Status.InstanceState = &infrav1.InstanceStateError
469473
return nil, fmt.Errorf("create OpenStack instance: %w", err)
470474
}
475+
// We reached a point where a server was created with no error but we can't predict its state yet which is why we don't update conditions yet.
476+
// The actual state of the server is checked in the next reconcile loops.
471477
return instanceStatus, nil
472478
}
473479
return instanceStatus, nil

controllers/openstackserver_controller_test.go

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,11 @@ import (
3131
"github.com/gophercloud/gophercloud/v2/openstack/networking/v2/ports"
3232
. "github.com/onsi/gomega" //nolint:revive
3333
"go.uber.org/mock/gomock"
34+
corev1 "k8s.io/api/core/v1"
3435
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3536
"k8s.io/utils/ptr"
37+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
38+
"sigs.k8s.io/cluster-api/util/conditions"
3639

3740
infrav1alpha1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1alpha1"
3841
infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1beta1"
@@ -548,3 +551,184 @@ func Test_OpenStackServerReconcileCreate(t *testing.T) {
548551
})
549552
}
550553
}
554+
555+
func TestOpenStackServerReconciler_getOrCreateServer(t *testing.T) {
556+
tests := []struct {
557+
name string
558+
openStackServer *infrav1alpha1.OpenStackServer
559+
setupMocks func(r *recorders)
560+
wantServer *servers.Server
561+
wantErr bool
562+
wantCondition *clusterv1.Condition
563+
}{
564+
{
565+
name: "instanceID set in status but server not found",
566+
openStackServer: &infrav1alpha1.OpenStackServer{
567+
Status: infrav1alpha1.OpenStackServerStatus{
568+
InstanceID: ptr.To(instanceUUID),
569+
},
570+
},
571+
setupMocks: func(r *recorders) {
572+
r.compute.GetServer(instanceUUID).Return(nil, gophercloud.ErrUnexpectedResponseCode{Actual: 404})
573+
},
574+
wantErr: false,
575+
wantCondition: &clusterv1.Condition{
576+
Type: infrav1.InstanceReadyCondition,
577+
Status: corev1.ConditionFalse,
578+
Reason: infrav1.InstanceNotFoundReason,
579+
Message: infrav1.ServerUnexpectedDeletedMessage,
580+
},
581+
},
582+
{
583+
name: "instanceID set in status but server not found with error",
584+
openStackServer: &infrav1alpha1.OpenStackServer{
585+
Status: infrav1alpha1.OpenStackServerStatus{
586+
InstanceID: ptr.To(instanceUUID),
587+
},
588+
},
589+
setupMocks: func(r *recorders) {
590+
r.compute.GetServer(instanceUUID).Return(nil, fmt.Errorf("error"))
591+
},
592+
wantErr: true,
593+
wantCondition: &clusterv1.Condition{
594+
Type: infrav1.InstanceReadyCondition,
595+
Status: corev1.ConditionFalse,
596+
Reason: infrav1.OpenStackErrorReason,
597+
Message: "get server \"" + instanceUUID + "\" detail failed: error",
598+
},
599+
},
600+
{
601+
name: "instanceStatus is nil but server found with machine name",
602+
openStackServer: &infrav1alpha1.OpenStackServer{
603+
ObjectMeta: metav1.ObjectMeta{
604+
Name: openStackServerName,
605+
},
606+
Status: infrav1alpha1.OpenStackServerStatus{},
607+
},
608+
setupMocks: func(r *recorders) {
609+
r.compute.ListServers(servers.ListOpts{
610+
Name: "^" + openStackServerName + "$",
611+
}).Return([]servers.Server{{ID: instanceUUID}}, nil)
612+
},
613+
wantErr: false,
614+
wantServer: &servers.Server{
615+
ID: instanceUUID,
616+
},
617+
},
618+
{
619+
name: "instanceStatus is nil and server not found and then created",
620+
openStackServer: &infrav1alpha1.OpenStackServer{
621+
ObjectMeta: metav1.ObjectMeta{
622+
Name: openStackServerName,
623+
},
624+
Status: infrav1alpha1.OpenStackServerStatus{
625+
Resolved: &infrav1alpha1.ResolvedServerSpec{
626+
ImageID: imageUUID,
627+
FlavorID: flavorUUID,
628+
Ports: defaultResolvedPorts,
629+
},
630+
},
631+
},
632+
setupMocks: func(r *recorders) {
633+
r.compute.ListServers(servers.ListOpts{
634+
Name: "^" + openStackServerName + "$",
635+
}).Return([]servers.Server{}, nil)
636+
r.compute.CreateServer(gomock.Any(), gomock.Any()).Return(&servers.Server{ID: instanceUUID}, nil)
637+
},
638+
wantErr: false,
639+
wantServer: &servers.Server{
640+
ID: instanceUUID,
641+
},
642+
// It's off but no condition is set because the server creation was kicked off but we
643+
// don't know the result yet in this function.
644+
},
645+
{
646+
name: "instanceStatus is nil and server not found and then created with error",
647+
openStackServer: &infrav1alpha1.OpenStackServer{
648+
ObjectMeta: metav1.ObjectMeta{
649+
Name: openStackServerName,
650+
},
651+
Status: infrav1alpha1.OpenStackServerStatus{
652+
Resolved: &infrav1alpha1.ResolvedServerSpec{
653+
ImageID: imageUUID,
654+
FlavorID: flavorUUID,
655+
Ports: defaultResolvedPorts,
656+
},
657+
},
658+
},
659+
setupMocks: func(r *recorders) {
660+
r.compute.ListServers(servers.ListOpts{
661+
Name: "^" + openStackServerName + "$",
662+
}).Return([]servers.Server{}, nil)
663+
r.compute.CreateServer(gomock.Any(), gomock.Any()).Return(nil, fmt.Errorf("error"))
664+
},
665+
wantErr: true,
666+
wantCondition: &clusterv1.Condition{
667+
Type: infrav1.InstanceReadyCondition,
668+
Status: corev1.ConditionFalse,
669+
Reason: infrav1.InstanceCreateFailedReason,
670+
Message: "error creating Openstack instance: " + "error",
671+
},
672+
},
673+
}
674+
675+
for _, tt := range tests {
676+
t.Run(tt.name, func(t *testing.T) {
677+
g := NewGomegaWithT(t)
678+
log := testr.New(t)
679+
680+
mockCtrl := gomock.NewController(t)
681+
defer mockCtrl.Finish()
682+
683+
mockScopeFactory := scope.NewMockScopeFactory(mockCtrl, "")
684+
scopeWithLogger := scope.NewWithLogger(mockScopeFactory, log)
685+
686+
computeRecorder := mockScopeFactory.ComputeClient.EXPECT()
687+
imageRecorder := mockScopeFactory.ImageClient.EXPECT()
688+
networkRecorder := mockScopeFactory.NetworkClient.EXPECT()
689+
volumeRecorder := mockScopeFactory.VolumeClient.EXPECT()
690+
691+
recorders := &recorders{
692+
compute: computeRecorder,
693+
image: imageRecorder,
694+
network: networkRecorder,
695+
volume: volumeRecorder,
696+
}
697+
698+
if tt.setupMocks != nil {
699+
tt.setupMocks(recorders)
700+
}
701+
702+
computeService, err := compute.NewService(scopeWithLogger)
703+
g.Expect(err).ToNot(HaveOccurred())
704+
705+
reconciler := OpenStackServerReconciler{}
706+
status, err := reconciler.getOrCreateServer(ctx, log, tt.openStackServer, computeService, []string{portUUID})
707+
708+
// Check error result
709+
if tt.wantErr {
710+
g.Expect(err).To(HaveOccurred())
711+
} else {
712+
g.Expect(err).ToNot(HaveOccurred())
713+
}
714+
715+
// Check instance status
716+
if tt.wantServer != nil {
717+
g.Expect(status.ID()).To(Equal(tt.wantServer.ID))
718+
}
719+
720+
// Check the condition is set correctly
721+
if tt.wantCondition != nil {
722+
// print openstackServer conditions
723+
for _, condition := range tt.openStackServer.Status.Conditions {
724+
t.Logf("Condition: %s, Status: %s, Reason: %s", condition.Type, condition.Status, condition.Reason)
725+
}
726+
conditionType := conditions.Get(tt.openStackServer, tt.wantCondition.Type)
727+
g.Expect(conditionType).ToNot(BeNil())
728+
g.Expect(conditionType.Status).To(Equal(tt.wantCondition.Status))
729+
g.Expect(conditionType.Reason).To(Equal(tt.wantCondition.Reason))
730+
g.Expect(conditionType.Message).To(Equal(tt.wantCondition.Message))
731+
}
732+
})
733+
}
734+
}

test/e2e/data/e2e_conf.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ intervals:
201201
default/wait-control-plane: ["30m", "10s"]
202202
default/wait-worker-nodes: ["30m", "10s"]
203203
default/wait-delete-cluster: ["5m", "10s"]
204+
default/wait-delete-machine: ["30m", "10s"]
204205
default/wait-alt-az: ["20m", "30s"]
205206
default/wait-machine-upgrade: ["30m", "10s"]
206207
default/wait-nodes-ready: ["15m", "10s"]

test/e2e/shared/openstack.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,21 @@ func CreateOpenStackNetwork(e2eCtx *E2EContext, name, cidr string) (*networks.Ne
846846
return net, nil
847847
}
848848

849+
func DeleteOpenStackServer(ctx context.Context, e2eCtx *E2EContext, id string) error {
850+
providerClient, clientOpts, _, err := GetTenantProviderClient(e2eCtx)
851+
if err != nil {
852+
_, _ = fmt.Fprintf(GinkgoWriter, "error creating provider client: %s\n", err)
853+
return err
854+
}
855+
856+
computeClient, err := openstack.NewComputeV2(providerClient, gophercloud.EndpointOpts{Region: clientOpts.RegionName})
857+
if err != nil {
858+
return fmt.Errorf("error creating compute client: %s", err)
859+
}
860+
861+
return servers.Delete(ctx, computeClient, id).ExtractErr()
862+
}
863+
849864
func DeleteOpenStackNetwork(ctx context.Context, e2eCtx *E2EContext, id string) error {
850865
providerClient, clientOpts, _, err := GetTenantProviderClient(e2eCtx)
851866
if err != nil {

test/e2e/suites/e2e/e2e_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,6 +1006,28 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
10061006
Expect(additionalVolume.AvailabilityZone).To(Equal(failureDomain))
10071007
Expect(additionalVolume.VolumeType).To(Equal(volumeTypeAlt))
10081008
}
1009+
1010+
// This last block tests a scenario where an external agent deletes a server (e.g. a user via Horizon).
1011+
// We want to ensure that the OpenStackMachine conditions are updated to reflect the server deletion.
1012+
// Context: https://github.com/kubernetes-sigs/cluster-api-provider-openstack/issues/2474
1013+
shared.Logf("Deleting a server")
1014+
serverToDelete := allServers[controlPlaneMachines[0].Spec.InfrastructureRef.Name]
1015+
err = shared.DeleteOpenStackServer(ctx, e2eCtx, serverToDelete.ID)
1016+
Expect(err).NotTo(HaveOccurred())
1017+
shared.Logf("Waiting for the OpenStackMachine to have a condition that the server has been unexpectedly deleted")
1018+
Eventually(func() bool {
1019+
openStackMachine := &infrav1.OpenStackMachine{}
1020+
err := e2eCtx.Environment.BootstrapClusterProxy.GetClient().Get(ctx, crclient.ObjectKey{Name: controlPlaneMachines[0].Name, Namespace: controlPlaneMachines[0].Namespace}, openStackMachine)
1021+
if err != nil {
1022+
return false
1023+
}
1024+
for _, condition := range openStackMachine.Status.Conditions {
1025+
if condition.Type == infrav1.InstanceReadyCondition && condition.Status == corev1.ConditionFalse && condition.Reason == infrav1.InstanceDeletedReason && condition.Message == "server has been unexpectedly deleted" {
1026+
return true
1027+
}
1028+
}
1029+
return false
1030+
}, e2eCtx.E2EConfig.GetIntervals(specName, "wait-delete-machine")...).Should(BeTrue())
10091031
})
10101032
})
10111033
})

0 commit comments

Comments
 (0)