Skip to content

Commit a43f0de

Browse files
qiujian16skeeey
andauthored
avoid cluster auto approve failed occasionally (#388) (#395)
Signed-off-by: Wei Liu <liuweixa@redhat.com> Co-authored-by: Wei Liu <liuweixa@redhat.com>
1 parent 8b1a2a0 commit a43f0de

File tree

8 files changed

+108
-53
lines changed

8 files changed

+108
-53
lines changed

.github/workflows/pre.yml

-8
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,6 @@ jobs:
6262
go-version: ${{ env.GO_VERSION }}
6363
- name: unit
6464
run: make test
65-
- name: report coverage
66-
uses: codecov/codecov-action@v3
67-
with:
68-
files: ./coverage.out
69-
flags: unit
70-
name: unit
71-
verbose: true
72-
fail_ci_if_error: true
7365

7466
integration:
7567
name: integration

pkg/registration/hub/csr/controller_test.go

-2
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,6 @@ func TestSync(t *testing.T) {
212212
NewCSRRenewalReconciler(kubeClient, recorder),
213213
NewCSRBootstrapReconciler(
214214
kubeClient,
215-
clusterClient,
216-
clusterInformerFactory.Cluster().V1().ManagedClusters().Lister(),
217215
c.approvalUsers,
218216
recorder,
219217
),

pkg/registration/hub/csr/reconciler.go

-35
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,11 @@ import (
1111
authorizationv1 "k8s.io/api/authorization/v1"
1212
certificatesv1 "k8s.io/api/certificates/v1"
1313
certificatesv1beta1 "k8s.io/api/certificates/v1beta1"
14-
"k8s.io/apimachinery/pkg/api/errors"
1514
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
16-
"k8s.io/apimachinery/pkg/types"
1715
"k8s.io/apimachinery/pkg/util/sets"
1816
"k8s.io/client-go/kubernetes"
1917
"k8s.io/klog/v2"
2018

21-
clusterclientset "open-cluster-management.io/api/client/cluster/clientset/versioned"
22-
clusterv1listers "open-cluster-management.io/api/client/cluster/listers/cluster/v1"
2319
clusterv1 "open-cluster-management.io/api/cluster/v1"
2420

2521
"open-cluster-management.io/ocm/pkg/registration/hub/user"
@@ -95,21 +91,15 @@ func (r *csrRenewalReconciler) Reconcile(ctx context.Context, csr csrInfo, appro
9591

9692
type csrBootstrapReconciler struct {
9793
kubeClient kubernetes.Interface
98-
clusterClient clusterclientset.Interface
99-
clusterLister clusterv1listers.ManagedClusterLister
10094
approvalUsers sets.Set[string]
10195
eventRecorder events.Recorder
10296
}
10397

10498
func NewCSRBootstrapReconciler(kubeClient kubernetes.Interface,
105-
clusterClient clusterclientset.Interface,
106-
clusterLister clusterv1listers.ManagedClusterLister,
10799
approvalUsers []string,
108100
recorder events.Recorder) Reconciler {
109101
return &csrBootstrapReconciler{
110102
kubeClient: kubeClient,
111-
clusterClient: clusterClient,
112-
clusterLister: clusterLister,
113103
approvalUsers: sets.New(approvalUsers...),
114104
eventRecorder: recorder.WithComponentSuffix("csr-approving-controller"),
115105
}
@@ -129,15 +119,6 @@ func (b *csrBootstrapReconciler) Reconcile(ctx context.Context, csr csrInfo, app
129119
return reconcileContinue, nil
130120
}
131121

132-
err := b.accpetCluster(ctx, clusterName)
133-
if errors.IsNotFound(err) {
134-
// Current spoke cluster not found, could have been deleted, do nothing.
135-
return reconcileStop, nil
136-
}
137-
if err != nil {
138-
return reconcileContinue, err
139-
}
140-
141122
if err := approveCSR(b.kubeClient); err != nil {
142123
return reconcileContinue, err
143124
}
@@ -146,22 +127,6 @@ func (b *csrBootstrapReconciler) Reconcile(ctx context.Context, csr csrInfo, app
146127
return reconcileStop, nil
147128
}
148129

149-
func (b *csrBootstrapReconciler) accpetCluster(ctx context.Context, managedClusterName string) error {
150-
managedCluster, err := b.clusterLister.Get(managedClusterName)
151-
if err != nil {
152-
return err
153-
}
154-
155-
if managedCluster.Spec.HubAcceptsClient {
156-
return nil
157-
}
158-
159-
patch := []byte("{\"spec\": {\"hubAcceptsClient\": true}}")
160-
_, err = b.clusterClient.ClusterV1().ManagedClusters().Patch(
161-
ctx, managedCluster.Name, types.MergePatchType, patch, metav1.PatchOptions{})
162-
return err
163-
}
164-
165130
// To validate a managed cluster csr, we check
166131
// 1. if the signer name in csr request is valid.
167132
// 2. if organization field and commonName field in csr request is valid.

pkg/registration/hub/managedcluster/controller.go

+28
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package managedcluster
33
import (
44
"context"
55
"fmt"
6+
"time"
67

78
"github.com/openshift/library-go/pkg/controller/factory"
89
"github.com/openshift/library-go/pkg/operator/events"
@@ -12,6 +13,7 @@ import (
1213
"k8s.io/apimachinery/pkg/api/errors"
1314
"k8s.io/apimachinery/pkg/api/meta"
1415
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
16+
"k8s.io/apimachinery/pkg/types"
1517
rbacv1informers "k8s.io/client-go/informers/rbac/v1"
1618
"k8s.io/client-go/kubernetes"
1719
"k8s.io/klog/v2"
@@ -20,14 +22,20 @@ import (
2022
informerv1 "open-cluster-management.io/api/client/cluster/informers/externalversions/cluster/v1"
2123
listerv1 "open-cluster-management.io/api/client/cluster/listers/cluster/v1"
2224
v1 "open-cluster-management.io/api/cluster/v1"
25+
ocmfeature "open-cluster-management.io/api/feature"
2326
"open-cluster-management.io/sdk-go/pkg/patcher"
2427

2528
"open-cluster-management.io/ocm/pkg/common/apply"
2629
"open-cluster-management.io/ocm/pkg/common/queue"
30+
"open-cluster-management.io/ocm/pkg/features"
2731
"open-cluster-management.io/ocm/pkg/registration/helpers"
2832
"open-cluster-management.io/ocm/pkg/registration/hub/manifests"
2933
)
3034

35+
// this is an internal annotation to indicate a managed cluster is already accepted automatically, it is not
36+
// expected to be changed or removed outside.
37+
const clusterAcceptedAnnotationKey = "open-cluster-management.io/automatically-accepted-on"
38+
3139
var staticFiles = []string{
3240
"rbac/managedcluster-clusterrole.yaml",
3341
"rbac/managedcluster-clusterrolebinding.yaml",
@@ -38,6 +46,7 @@ var staticFiles = []string{
3846
// managedClusterController reconciles instances of ManagedCluster on the hub.
3947
type managedClusterController struct {
4048
kubeClient kubernetes.Interface
49+
clusterClient clientset.Interface
4150
clusterLister listerv1.ManagedClusterLister
4251
applier *apply.PermissionApplier
4352
patcher patcher.Patcher[*v1.ManagedCluster, v1.ManagedClusterSpec, v1.ManagedClusterStatus]
@@ -56,6 +65,7 @@ func NewManagedClusterController(
5665
recorder events.Recorder) factory.Controller {
5766
c := &managedClusterController{
5867
kubeClient: kubeClient,
68+
clusterClient: clusterClient,
5969
clusterLister: clusterInformer.Lister(),
6070
applier: apply.NewPermissionApplier(
6171
kubeClient,
@@ -103,6 +113,14 @@ func (c *managedClusterController) sync(ctx context.Context, syncCtx factory.Syn
103113
}
104114

105115
if !managedCluster.Spec.HubAcceptsClient {
116+
// If the ManagedClusterAutoApproval feature is enabled, we automatically accept a cluster only
117+
// when it joins for the first time, afterwards users can deny it again.
118+
if features.HubMutableFeatureGate.Enabled(ocmfeature.ManagedClusterAutoApproval) {
119+
if _, ok := managedCluster.Annotations[clusterAcceptedAnnotationKey]; !ok {
120+
return c.acceptCluster(ctx, managedClusterName)
121+
}
122+
}
123+
106124
// Current spoke cluster is not accepted, do nothing.
107125
if !meta.IsStatusConditionTrue(managedCluster.Status.Conditions, v1.ManagedClusterConditionHubAccepted) {
108126
return nil
@@ -199,3 +217,13 @@ func (c *managedClusterController) removeManagedClusterResources(ctx context.Con
199217
}
200218
return operatorhelpers.NewMultiLineAggregate(errs)
201219
}
220+
221+
func (c *managedClusterController) acceptCluster(ctx context.Context, managedClusterName string) error {
222+
// TODO support patching both annotations and spec simultaneously in the patcher
223+
acceptedTime := time.Now()
224+
patch := fmt.Sprintf(`{"metadata":{"annotations":{"%s":"%s"}},"spec":{"hubAcceptsClient":true}}`,
225+
clusterAcceptedAnnotationKey, acceptedTime.Format(time.RFC3339))
226+
_, err := c.clusterClient.ClusterV1().ManagedClusters().Patch(ctx, managedClusterName,
227+
types.MergePatchType, []byte(patch), metav1.PatchOptions{})
228+
return err
229+
}

pkg/registration/hub/managedcluster/controller_test.go

+20-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package managedcluster
33
import (
44
"context"
55
"encoding/json"
6+
"fmt"
67
"testing"
78
"time"
89

@@ -16,18 +17,21 @@ import (
1617
clusterfake "open-cluster-management.io/api/client/cluster/clientset/versioned/fake"
1718
clusterinformers "open-cluster-management.io/api/client/cluster/informers/externalversions"
1819
v1 "open-cluster-management.io/api/cluster/v1"
20+
ocmfeature "open-cluster-management.io/api/feature"
1921
"open-cluster-management.io/sdk-go/pkg/patcher"
2022

2123
"open-cluster-management.io/ocm/pkg/common/apply"
2224
testingcommon "open-cluster-management.io/ocm/pkg/common/testing"
25+
"open-cluster-management.io/ocm/pkg/features"
2326
testinghelpers "open-cluster-management.io/ocm/pkg/registration/helpers/testing"
2427
)
2528

2629
func TestSyncManagedCluster(t *testing.T) {
2730
cases := []struct {
28-
name string
29-
startingObjects []runtime.Object
30-
validateActions func(t *testing.T, actions []clienttesting.Action)
31+
name string
32+
autoApprovalEnabled bool
33+
startingObjects []runtime.Object
34+
validateActions func(t *testing.T, actions []clienttesting.Action)
3135
}{
3236
{
3337
name: "sync a deleted spoke cluster",
@@ -97,8 +101,18 @@ func TestSyncManagedCluster(t *testing.T) {
97101
testingcommon.AssertNoActions(t, actions)
98102
},
99103
},
104+
{
105+
name: "should accept the clusters when auto approval is enabled",
106+
autoApprovalEnabled: true,
107+
startingObjects: []runtime.Object{testinghelpers.NewManagedCluster()},
108+
validateActions: func(t *testing.T, actions []clienttesting.Action) {
109+
testingcommon.AssertActions(t, actions, "patch")
110+
},
111+
},
100112
}
101113

114+
features.HubMutableFeatureGate.Add(ocmfeature.DefaultHubRegistrationFeatureGates)
115+
102116
for _, c := range cases {
103117
t.Run(c.name, func(t *testing.T) {
104118
clusterClient := clusterfake.NewSimpleClientset(c.startingObjects...)
@@ -112,8 +126,11 @@ func TestSyncManagedCluster(t *testing.T) {
112126
}
113127
}
114128

129+
features.HubMutableFeatureGate.Set(fmt.Sprintf("%s=%v", ocmfeature.ManagedClusterAutoApproval, c.autoApprovalEnabled))
130+
115131
ctrl := managedClusterController{
116132
kubeClient,
133+
clusterClient,
117134
clusterInformerFactory.Cluster().V1().ManagedClusters().Lister(),
118135
apply.NewPermissionApplier(
119136
kubeClient,

pkg/registration/hub/manager.go

-2
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,6 @@ func (m *HubManagerOptions) RunControllerManagerWithInformers(
153153
if features.HubMutableFeatureGate.Enabled(ocmfeature.ManagedClusterAutoApproval) {
154154
csrReconciles = append(csrReconciles, csr.NewCSRBootstrapReconciler(
155155
kubeClient,
156-
clusterClient,
157-
clusterInformers.Cluster().V1().ManagedClusters().Lister(),
158156
m.ClusterAutoApprovalUsers,
159157
controllerContext.EventRecorder,
160158
))

test/integration/registration/clusterannotations_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ var _ = ginkgo.Describe("Cluster Annotations", func() {
4545
return err
4646
}
4747

48-
if len(mc.Annotations) != 1 {
49-
return fmt.Errorf("expected 1 annotation, got %d", len(mc.Annotations))
48+
if _, ok := mc.Annotations["foo"]; ok {
49+
return fmt.Errorf("unexpected annotations %v", mc.Annotations)
5050
}
5151

5252
if mc.Annotations["agent.open-cluster-management.io/foo"] != "bar" {

test/integration/registration/spokecluster_autoapproval_test.go

+58-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package registration_test
22

33
import (
4+
"context"
45
"fmt"
56
"path"
67
"time"
@@ -9,6 +10,7 @@ import (
910
"github.com/onsi/gomega"
1011
certificates "k8s.io/api/certificates/v1"
1112
"k8s.io/apimachinery/pkg/api/meta"
13+
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1214

1315
clusterv1 "open-cluster-management.io/api/cluster/v1"
1416

@@ -17,6 +19,8 @@ import (
1719
"open-cluster-management.io/ocm/test/integration/util"
1820
)
1921

22+
const expectedAnnotation = "open-cluster-management.io/automatically-accepted-on"
23+
2024
var _ = ginkgo.Describe("Cluster Auto Approval", func() {
2125
ginkgo.It("Cluster should be automatically approved", func() {
2226
var err error
@@ -49,7 +53,6 @@ var _ = ginkgo.Describe("Cluster Auto Approval", func() {
4953
if err != nil {
5054
return false
5155
}
52-
5356
return cluster.Spec.HubAcceptsClient
5457
}, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue())
5558

@@ -84,4 +87,58 @@ var _ = ginkgo.Describe("Cluster Auto Approval", func() {
8487
return nil
8588
}, eventuallyTimeout, eventuallyInterval).ShouldNot(gomega.HaveOccurred())
8689
})
90+
91+
ginkgo.It("Cluster can be denied by manually after automatically approved", func() {
92+
clusterName := "autoapprovaltest-spoke-cluster1"
93+
_, err := clusterClient.ClusterV1().ManagedClusters().Create(context.Background(), &clusterv1.ManagedCluster{
94+
ObjectMeta: v1.ObjectMeta{
95+
Name: clusterName,
96+
},
97+
}, v1.CreateOptions{})
98+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
99+
100+
gomega.Eventually(func() error {
101+
cluster, err := util.GetManagedCluster(clusterClient, clusterName)
102+
if err != nil {
103+
return err
104+
}
105+
106+
if _, ok := cluster.Annotations[expectedAnnotation]; !ok {
107+
return fmt.Errorf("cluster should have accepted annotation")
108+
}
109+
110+
if !cluster.Spec.HubAcceptsClient {
111+
return fmt.Errorf("cluster should be accepted")
112+
}
113+
114+
return nil
115+
}, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed())
116+
117+
// we can deny the cluster after it is accepted
118+
cluster, err := util.GetManagedCluster(clusterClient, clusterName)
119+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
120+
121+
updatedCluster := cluster.DeepCopy()
122+
updatedCluster.Spec.HubAcceptsClient = false
123+
124+
_, err = clusterClient.ClusterV1().ManagedClusters().Update(context.TODO(), updatedCluster, v1.UpdateOptions{})
125+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
126+
127+
gomega.Consistently(func() error {
128+
cluster, err := util.GetManagedCluster(clusterClient, clusterName)
129+
if err != nil {
130+
return err
131+
}
132+
133+
if _, ok := cluster.Annotations[expectedAnnotation]; !ok {
134+
return fmt.Errorf("cluster should have accepted annotation")
135+
}
136+
137+
if cluster.Spec.HubAcceptsClient {
138+
return fmt.Errorf("cluster should be denied")
139+
}
140+
141+
return nil
142+
}, 10*time.Second, eventuallyInterval).Should(gomega.Succeed())
143+
})
87144
})

0 commit comments

Comments
 (0)