diff --git a/api/v1beta1/conditions_consts.go b/api/v1beta1/conditions_consts.go index ec439ca5ae..32ddac8798 100644 --- a/api/v1beta1/conditions_consts.go +++ b/api/v1beta1/conditions_consts.go @@ -44,6 +44,9 @@ const ( OpenStackErrorReason = "OpenStackError" // DependencyFailedReason indicates that a dependent object failed. DependencyFailedReason = "DependencyFailed" + + // ServerUnexpectedDeletedMessage is the message used when the server is unexpectedly deleted via an external agent. + ServerUnexpectedDeletedMessage = "The server was unexpectedly deleted" ) const ( diff --git a/controllers/openstackmachine_controller.go b/controllers/openstackmachine_controller.go index 1e0a4d573e..da240a9993 100644 --- a/controllers/openstackmachine_controller.go +++ b/controllers/openstackmachine_controller.go @@ -365,6 +365,11 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope return ctrl.Result{}, err } + result := r.reconcileMachineState(scope, openStackMachine, machine, machineServer) + if result != nil { + return *result, nil + } + computeService, err := compute.NewService(scope) if err != nil { return ctrl.Result{}, err @@ -377,6 +382,12 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope return ctrl.Result{}, err } + if instanceStatus == nil { + msg := "server has been unexpectedly deleted" + conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceDeletedReason, clusterv1.ConditionSeverityError, msg) + openStackMachine.SetFailure(capoerrors.DeprecatedCAPIUpdateMachineError, errors.New(msg)) + } + instanceNS, err := instanceStatus.NetworkStatus() if err != nil { return ctrl.Result{}, fmt.Errorf("get network status: %w", err) @@ -393,22 +404,15 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope }) openStackMachine.Status.Addresses = addresses - result := r.reconcileMachineState(scope, openStackMachine, machine, machineServer) - if result != nil { - return *result, nil - } - - if !util.IsControlPlaneMachine(machine) { - scope.Logger().Info("Not a Control plane machine, no floating ip reconcile needed, Reconciled Machine create successfully") - return ctrl.Result{}, nil - } + if util.IsControlPlaneMachine(machine) { + err = r.reconcileAPIServerLoadBalancer(scope, openStackCluster, openStackMachine, instanceStatus, instanceNS, clusterResourceName) + if err != nil { + return ctrl.Result{}, err + } - err = r.reconcileAPIServerLoadBalancer(scope, openStackCluster, openStackMachine, instanceStatus, instanceNS, clusterResourceName) - if err != nil { - return ctrl.Result{}, err + conditions.MarkTrue(openStackMachine, infrav1.APIServerIngressReadyCondition) } - conditions.MarkTrue(openStackMachine, infrav1.APIServerIngressReadyCondition) scope.Logger().Info("Reconciled Machine create successfully") return ctrl.Result{}, nil } diff --git a/controllers/openstackserver_controller.go b/controllers/openstackserver_controller.go index f5061462fe..f7b590d3d7 100644 --- a/controllers/openstackserver_controller.go +++ b/controllers/openstackserver_controller.go @@ -433,9 +433,18 @@ func (r *OpenStackServerReconciler) getOrCreateServer(ctx context.Context, logge if openStackServer.Status.InstanceID != nil { instanceStatus, err = computeService.GetInstanceStatus(*openStackServer.Status.InstanceID) - if err != nil { - logger.Info("Unable to get OpenStack instance", "name", openStackServer.Name) - conditions.MarkFalse(openStackServer, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, "%s", err.Error()) + if err != nil || instanceStatus == nil { + logger.Info("Unable to get OpenStack instance", "name", openStackServer.Name, "id", *openStackServer.Status.InstanceID) + var msg string + var reason string + if err != nil { + msg = err.Error() + reason = infrav1.OpenStackErrorReason + } else { + msg = infrav1.ServerUnexpectedDeletedMessage + reason = infrav1.InstanceNotFoundReason + } + conditions.MarkFalse(openStackServer, infrav1.InstanceReadyCondition, reason, clusterv1.ConditionSeverityError, msg) return nil, err } } @@ -450,11 +459,6 @@ func (r *OpenStackServerReconciler) getOrCreateServer(ctx context.Context, logge logger.Info("Server already exists", "name", openStackServer.Name, "id", instanceStatus.ID()) return instanceStatus, nil } - if openStackServer.Status.InstanceID != nil { - logger.Info("Not reconciling server in failed state. The previously existing OpenStack instance is no longer available") - conditions.MarkFalse(openStackServer, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, clusterv1.ConditionSeverityError, "virtual machine no longer exists") - return nil, nil - } logger.Info("Server does not exist, creating Server", "name", openStackServer.Name) instanceSpec, err := r.serverToInstanceSpec(ctx, openStackServer) @@ -468,6 +472,8 @@ func (r *OpenStackServerReconciler) getOrCreateServer(ctx context.Context, logge openStackServer.Status.InstanceState = &infrav1.InstanceStateError return nil, fmt.Errorf("create OpenStack instance: %w", err) } + // We reached a point where a server was created with no error but we can't predict its state yet which is why we don't update conditions yet. + // The actual state of the server is checked in the next reconcile loops. return instanceStatus, nil } return instanceStatus, nil diff --git a/controllers/openstackserver_controller_test.go b/controllers/openstackserver_controller_test.go index deefefc1ae..317527461a 100644 --- a/controllers/openstackserver_controller_test.go +++ b/controllers/openstackserver_controller_test.go @@ -31,8 +31,11 @@ import ( "github.com/gophercloud/gophercloud/v2/openstack/networking/v2/ports" . "github.com/onsi/gomega" //nolint:revive "go.uber.org/mock/gomock" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" infrav1alpha1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1alpha1" infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1beta1" @@ -548,3 +551,184 @@ func Test_OpenStackServerReconcileCreate(t *testing.T) { }) } } + +func TestOpenStackServerReconciler_getOrCreateServer(t *testing.T) { + tests := []struct { + name string + openStackServer *infrav1alpha1.OpenStackServer + setupMocks func(r *recorders) + wantServer *servers.Server + wantErr bool + wantCondition *clusterv1.Condition + }{ + { + name: "instanceID set in status but server not found", + openStackServer: &infrav1alpha1.OpenStackServer{ + Status: infrav1alpha1.OpenStackServerStatus{ + InstanceID: ptr.To(instanceUUID), + }, + }, + setupMocks: func(r *recorders) { + r.compute.GetServer(instanceUUID).Return(nil, gophercloud.ErrUnexpectedResponseCode{Actual: 404}) + }, + wantErr: false, + wantCondition: &clusterv1.Condition{ + Type: infrav1.InstanceReadyCondition, + Status: corev1.ConditionFalse, + Reason: infrav1.InstanceNotFoundReason, + Message: infrav1.ServerUnexpectedDeletedMessage, + }, + }, + { + name: "instanceID set in status but server not found with error", + openStackServer: &infrav1alpha1.OpenStackServer{ + Status: infrav1alpha1.OpenStackServerStatus{ + InstanceID: ptr.To(instanceUUID), + }, + }, + setupMocks: func(r *recorders) { + r.compute.GetServer(instanceUUID).Return(nil, fmt.Errorf("error")) + }, + wantErr: true, + wantCondition: &clusterv1.Condition{ + Type: infrav1.InstanceReadyCondition, + Status: corev1.ConditionFalse, + Reason: infrav1.OpenStackErrorReason, + Message: "get server \"" + instanceUUID + "\" detail failed: error", + }, + }, + { + name: "instanceStatus is nil but server found with machine name", + openStackServer: &infrav1alpha1.OpenStackServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: openStackServerName, + }, + Status: infrav1alpha1.OpenStackServerStatus{}, + }, + setupMocks: func(r *recorders) { + r.compute.ListServers(servers.ListOpts{ + Name: "^" + openStackServerName + "$", + }).Return([]servers.Server{{ID: instanceUUID}}, nil) + }, + wantErr: false, + wantServer: &servers.Server{ + ID: instanceUUID, + }, + }, + { + name: "instanceStatus is nil and server not found and then created", + openStackServer: &infrav1alpha1.OpenStackServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: openStackServerName, + }, + Status: infrav1alpha1.OpenStackServerStatus{ + Resolved: &infrav1alpha1.ResolvedServerSpec{ + ImageID: imageUUID, + FlavorID: flavorUUID, + Ports: defaultResolvedPorts, + }, + }, + }, + setupMocks: func(r *recorders) { + r.compute.ListServers(servers.ListOpts{ + Name: "^" + openStackServerName + "$", + }).Return([]servers.Server{}, nil) + r.compute.CreateServer(gomock.Any(), gomock.Any()).Return(&servers.Server{ID: instanceUUID}, nil) + }, + wantErr: false, + wantServer: &servers.Server{ + ID: instanceUUID, + }, + // It's off but no condition is set because the server creation was kicked off but we + // don't know the result yet in this function. + }, + { + name: "instanceStatus is nil and server not found and then created with error", + openStackServer: &infrav1alpha1.OpenStackServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: openStackServerName, + }, + Status: infrav1alpha1.OpenStackServerStatus{ + Resolved: &infrav1alpha1.ResolvedServerSpec{ + ImageID: imageUUID, + FlavorID: flavorUUID, + Ports: defaultResolvedPorts, + }, + }, + }, + setupMocks: func(r *recorders) { + r.compute.ListServers(servers.ListOpts{ + Name: "^" + openStackServerName + "$", + }).Return([]servers.Server{}, nil) + r.compute.CreateServer(gomock.Any(), gomock.Any()).Return(nil, fmt.Errorf("error")) + }, + wantErr: true, + wantCondition: &clusterv1.Condition{ + Type: infrav1.InstanceReadyCondition, + Status: corev1.ConditionFalse, + Reason: infrav1.InstanceCreateFailedReason, + Message: "error creating Openstack instance: " + "error", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewGomegaWithT(t) + log := testr.New(t) + + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + mockScopeFactory := scope.NewMockScopeFactory(mockCtrl, "") + scopeWithLogger := scope.NewWithLogger(mockScopeFactory, log) + + computeRecorder := mockScopeFactory.ComputeClient.EXPECT() + imageRecorder := mockScopeFactory.ImageClient.EXPECT() + networkRecorder := mockScopeFactory.NetworkClient.EXPECT() + volumeRecorder := mockScopeFactory.VolumeClient.EXPECT() + + recorders := &recorders{ + compute: computeRecorder, + image: imageRecorder, + network: networkRecorder, + volume: volumeRecorder, + } + + if tt.setupMocks != nil { + tt.setupMocks(recorders) + } + + computeService, err := compute.NewService(scopeWithLogger) + g.Expect(err).ToNot(HaveOccurred()) + + reconciler := OpenStackServerReconciler{} + status, err := reconciler.getOrCreateServer(ctx, log, tt.openStackServer, computeService, []string{portUUID}) + + // Check error result + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + + // Check instance status + if tt.wantServer != nil { + g.Expect(status.ID()).To(Equal(tt.wantServer.ID)) + } + + // Check the condition is set correctly + if tt.wantCondition != nil { + // print openstackServer conditions + for _, condition := range tt.openStackServer.Status.Conditions { + t.Logf("Condition: %s, Status: %s, Reason: %s", condition.Type, condition.Status, condition.Reason) + } + conditionType := conditions.Get(tt.openStackServer, tt.wantCondition.Type) + g.Expect(conditionType).ToNot(BeNil()) + g.Expect(conditionType.Status).To(Equal(tt.wantCondition.Status)) + g.Expect(conditionType.Reason).To(Equal(tt.wantCondition.Reason)) + g.Expect(conditionType.Message).To(Equal(tt.wantCondition.Message)) + } + }) + } +} diff --git a/test/e2e/data/e2e_conf.yaml b/test/e2e/data/e2e_conf.yaml index be1be4042c..53245e05b1 100644 --- a/test/e2e/data/e2e_conf.yaml +++ b/test/e2e/data/e2e_conf.yaml @@ -201,6 +201,7 @@ intervals: default/wait-control-plane: ["30m", "10s"] default/wait-worker-nodes: ["30m", "10s"] default/wait-delete-cluster: ["5m", "10s"] + default/wait-delete-machine: ["30m", "10s"] default/wait-alt-az: ["20m", "30s"] default/wait-machine-upgrade: ["30m", "10s"] default/wait-nodes-ready: ["15m", "10s"] diff --git a/test/e2e/shared/openstack.go b/test/e2e/shared/openstack.go index 00eccc2f91..b25aef7408 100644 --- a/test/e2e/shared/openstack.go +++ b/test/e2e/shared/openstack.go @@ -846,6 +846,21 @@ func CreateOpenStackNetwork(e2eCtx *E2EContext, name, cidr string) (*networks.Ne return net, nil } +func DeleteOpenStackServer(ctx context.Context, e2eCtx *E2EContext, id string) error { + providerClient, clientOpts, _, err := GetTenantProviderClient(e2eCtx) + if err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "error creating provider client: %s\n", err) + return err + } + + computeClient, err := openstack.NewComputeV2(providerClient, gophercloud.EndpointOpts{Region: clientOpts.RegionName}) + if err != nil { + return fmt.Errorf("error creating compute client: %s", err) + } + + return servers.Delete(ctx, computeClient, id).ExtractErr() +} + func DeleteOpenStackNetwork(ctx context.Context, e2eCtx *E2EContext, id string) error { providerClient, clientOpts, _, err := GetTenantProviderClient(e2eCtx) if err != nil { diff --git a/test/e2e/suites/e2e/e2e_test.go b/test/e2e/suites/e2e/e2e_test.go index cb55b1b028..4d87de313b 100644 --- a/test/e2e/suites/e2e/e2e_test.go +++ b/test/e2e/suites/e2e/e2e_test.go @@ -1006,6 +1006,28 @@ var _ = Describe("e2e tests [PR-Blocking]", func() { Expect(additionalVolume.AvailabilityZone).To(Equal(failureDomain)) Expect(additionalVolume.VolumeType).To(Equal(volumeTypeAlt)) } + + // This last block tests a scenario where an external agent deletes a server (e.g. a user via Horizon). + // We want to ensure that the OpenStackMachine conditions are updated to reflect the server deletion. + // Context: https://github.com/kubernetes-sigs/cluster-api-provider-openstack/issues/2474 + shared.Logf("Deleting a server") + serverToDelete := allServers[controlPlaneMachines[0].Spec.InfrastructureRef.Name] + err = shared.DeleteOpenStackServer(ctx, e2eCtx, serverToDelete.ID) + Expect(err).NotTo(HaveOccurred()) + shared.Logf("Waiting for the OpenStackMachine to have a condition that the server has been unexpectedly deleted") + Eventually(func() bool { + openStackMachine := &infrav1.OpenStackMachine{} + err := e2eCtx.Environment.BootstrapClusterProxy.GetClient().Get(ctx, crclient.ObjectKey{Name: controlPlaneMachines[0].Name, Namespace: controlPlaneMachines[0].Namespace}, openStackMachine) + if err != nil { + return false + } + for _, condition := range openStackMachine.Status.Conditions { + if condition.Type == infrav1.InstanceReadyCondition && condition.Status == corev1.ConditionFalse && condition.Reason == infrav1.InstanceDeletedReason && condition.Message == "server has been unexpectedly deleted" { + return true + } + } + return false + }, e2eCtx.E2EConfig.GetIntervals(specName, "wait-delete-machine")...).Should(BeTrue()) }) }) })