Skip to content

Commit 3f18ff0

Browse files
committed
addressing review feedback
Signed-off-by: Michael Shitrit <mshitrit@redhat.com>
1 parent 3ccb93d commit 3f18ff0

File tree

4 files changed

+71
-77
lines changed

4 files changed

+71
-77
lines changed

controllers/cluster/upgrade_checker.go

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"github.com/go-logr/logr"
77
gerrors "github.com/pkg/errors"
88

9+
corev1 "k8s.io/api/core/v1"
910
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1011
"sigs.k8s.io/controller-runtime/pkg/client"
1112
"sigs.k8s.io/controller-runtime/pkg/manager"
@@ -14,12 +15,19 @@ import (
1415
clusterversion "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
1516
)
1617

18+
const (
19+
// currentMachineConfigAnnotationKey is used to fetch current targetConfigVersionHash
20+
currentMachineConfigAnnotationKey = "machineconfiguration.openshift.io/currentConfig"
21+
// desiredMachineConfigAnnotationKey is used to indicate the version a node should be updating to
22+
desiredMachineConfigAnnotationKey = "machineconfiguration.openshift.io/desiredConfig"
23+
)
24+
1725
// UpgradeChecker checks if the cluster is currently under upgrade.
1826
// error should be thrown if it can't reliably determine if it's under upgrade or not.
1927
type UpgradeChecker interface {
2028
// Check if the cluster is currently under upgrade.
2129
// error should be thrown if it can't reliably determine if it's under upgrade or not.
22-
Check() (bool, error)
30+
Check(nodesToBeRemediated []corev1.Node) (bool, error)
2331
}
2432

2533
type openshiftClusterUpgradeStatusChecker struct {
@@ -31,7 +39,11 @@ type openshiftClusterUpgradeStatusChecker struct {
3139
// force implementation of interface
3240
var _ UpgradeChecker = &openshiftClusterUpgradeStatusChecker{}
3341

34-
func (o *openshiftClusterUpgradeStatusChecker) Check() (bool, error) {
42+
func (o *openshiftClusterUpgradeStatusChecker) Check(nodesToBeRemediated []corev1.Node) (bool, error) {
43+
if o.isNodeUpgrading(nodesToBeRemediated) {
44+
return true, nil
45+
}
46+
3547
cvs, err := o.clusterVersionsClient.List(context.Background(), metav1.ListOptions{})
3648
if err != nil {
3749
return false, gerrors.Wrap(err, "failed to check for Openshift cluster upgrade status")
@@ -47,13 +59,27 @@ func (o *openshiftClusterUpgradeStatusChecker) Check() (bool, error) {
4759
return false, nil
4860
}
4961

62+
func (o *openshiftClusterUpgradeStatusChecker) isNodeUpgrading(nodesToBeRemediated []corev1.Node) bool {
63+
// We can identify an HCP (Hosted Control Plane) cluster upgrade by checking annotations of the CP node.
64+
for _, node := range nodesToBeRemediated {
65+
if len(node.Annotations) == 0 || len(node.Annotations[desiredMachineConfigAnnotationKey]) == 0 {
66+
continue
67+
}
68+
69+
if node.Annotations[currentMachineConfigAnnotationKey] != node.Annotations[desiredMachineConfigAnnotationKey] {
70+
return true
71+
}
72+
}
73+
return false
74+
}
75+
5076
type noopClusterUpgradeStatusChecker struct {
5177
}
5278

5379
// force implementation of interface
5480
var _ UpgradeChecker = &noopClusterUpgradeStatusChecker{}
5581

56-
func (n *noopClusterUpgradeStatusChecker) Check() (bool, error) {
82+
func (n *noopClusterUpgradeStatusChecker) Check([]corev1.Node) (bool, error) {
5783
return false, nil
5884
}
5985

controllers/nodehealthcheck_controller.go

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,6 @@ const (
6969

7070
// RemediationControlPlaneLabelKey is the label key to put on remediation CRs for control plane nodes
7171
RemediationControlPlaneLabelKey = "remediation.medik8s.io/isControlPlaneNode"
72-
73-
// currentMachineConfigAnnotationKey is used to fetch current targetConfigVersionHash
74-
currentMachineConfigAnnotationKey = "machineconfiguration.openshift.io/currentConfig"
75-
// desiredMachineConfigAnnotationKey is used to indicate the version a node should be updating to
76-
desiredMachineConfigAnnotationKey = "machineconfiguration.openshift.io/desiredConfig"
7772
)
7873

7974
var (
@@ -394,26 +389,13 @@ func (r *NodeHealthCheckReconciler) Reconcile(ctx context.Context, req ctrl.Requ
394389
}
395390

396391
func (r *NodeHealthCheckReconciler) isClusterUpgrading(nodesToBeRemediated []v1.Node) bool {
397-
clusterUpgrading, err := r.ClusterUpgradeStatusChecker.Check()
392+
clusterUpgrading, err := r.ClusterUpgradeStatusChecker.Check(nodesToBeRemediated)
398393
if err != nil {
399394
// if we can't reliably tell if the cluster is upgrading then just continue with remediation.
400395
// TODO finer error handling may help to decide otherwise here.
401396
r.Log.Error(err, "failed to check if the cluster is upgrading. Proceed with remediation as if it is not upgrading")
402397
}
403-
return clusterUpgrading || r.isHostedControlPlaneUpgrading(nodesToBeRemediated)
404-
}
405-
406-
func (r *NodeHealthCheckReconciler) isHostedControlPlaneUpgrading(nodesToBeRemediated []v1.Node) bool {
407-
for _, node := range nodesToBeRemediated {
408-
if !nodes.IsControlPlane(&node) || len(node.Annotations) == 0 || len(node.Annotations[desiredMachineConfigAnnotationKey]) == 0 {
409-
continue
410-
}
411-
412-
if node.Annotations[currentMachineConfigAnnotationKey] != node.Annotations[desiredMachineConfigAnnotationKey] {
413-
return true
414-
}
415-
}
416-
return false
398+
return clusterUpgrading
417399
}
418400

419401
func (r *NodeHealthCheckReconciler) checkNodeConditions(nodes []v1.Node, nhc *remediationv1alpha1.NodeHealthCheck) (notMatchingNodes, soonMatchingNodes, matchingNodes []v1.Node, requeueAfter *time.Duration) {

controllers/nodehealthcheck_controller_test.go

Lines changed: 28 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1687,11 +1687,13 @@ var _ = Describe("Node Health Check CR", func() {
16871687
})
16881688

16891689
When("Nodes are candidates for remediation and cluster is upgrading", func() {
1690+
BeforeEach(func() {
1691+
clusterUpgradeRequeueAfter = 5 * time.Second
1692+
setupObjects(1, 2, true)
1693+
})
16901694
When("non HCP Upgrade", func() {
16911695
BeforeEach(func() {
1692-
clusterUpgradeRequeueAfter = 5 * time.Second
16931696
upgradeChecker.Upgrading = true
1694-
setupObjects(1, 2, true)
16951697
})
16961698

16971699
AfterEach(func() {
@@ -1722,49 +1724,31 @@ var _ = Describe("Node Health Check CR", func() {
17221724

17231725
})
17241726
When("HCP Upgrade", func() {
1725-
var unhealthyCPNodeName string
1727+
var currentMachineConfigAnnotationKey = "machineconfiguration.openshift.io/currentConfig"
1728+
var desiredMachineConfigAnnotationKey = "machineconfiguration.openshift.io/desiredConfig"
17261729
BeforeEach(func() {
1727-
clusterUpgradeRequeueAfter = 5 * time.Second
1728-
objects = newNodes(1, 0, true, true)
1729-
cpNode := objects[0]
1730-
objects = append(objects, newNodes(0, 2, false, false)...)
1731-
objects = append(objects, underTest)
1732-
unhealthyCPNodeName = cpNode.GetName()
1733-
cpNodeAnnotations := map[string]string{}
1734-
if cpNode.GetAnnotations() != nil {
1735-
cpNodeAnnotations = cpNode.GetAnnotations()
1736-
}
1737-
cpNodeAnnotations[currentMachineConfigAnnotationKey] = "fakeVersion1"
1738-
cpNodeAnnotations[desiredMachineConfigAnnotationKey] = "fakeVersion2"
1739-
cpNode.SetAnnotations(cpNodeAnnotations)
1740-
1741-
//Mock PDB in order to allow creation of CP remediation
1742-
etcdNs := &v1.Namespace{
1743-
ObjectMeta: metav1.ObjectMeta{
1744-
Name: "openshift-etcd",
1745-
},
1746-
Spec: v1.NamespaceSpec{},
1747-
}
1748-
err := k8sClient.Create(context.Background(), etcdNs)
1749-
if err != nil {
1750-
Expect(errors.IsAlreadyExists(err)).To(BeTrue())
1751-
}
1752-
1753-
pdb := &policyv1.PodDisruptionBudget{}
1754-
pdb.Namespace = etcdNs.Name
1755-
pdb.Name = "mockPDB"
1756-
Expect(k8sClient.Create(context.TODO(), pdb)).To(Succeed())
1757-
pdb.Status.DisruptionsAllowed = 1
1758-
Expect(k8sClient.Status().Update(context.TODO(), pdb)).To(Succeed())
1730+
//Use a real upgrade checker instead of mock
1731+
prevUpgradeChecker := nhcReconciler.ClusterUpgradeStatusChecker
1732+
nhcReconciler.ClusterUpgradeStatusChecker = ocpUpgradeChecker
17591733
DeferCleanup(func() {
1760-
Expect(k8sClient.Delete(context.TODO(), pdb)).To(Succeed())
1734+
nhcReconciler.ClusterUpgradeStatusChecker = prevUpgradeChecker
17611735
})
17621736

1737+
//Simulate HCP Upgrade on the unhealthy node
1738+
upgradingNode := objects[0]
1739+
upgradingNodeAnnotations := map[string]string{}
1740+
if upgradingNode.GetAnnotations() != nil {
1741+
upgradingNodeAnnotations = upgradingNode.GetAnnotations()
1742+
}
1743+
upgradingNodeAnnotations[currentMachineConfigAnnotationKey] = "fakeVersion1"
1744+
upgradingNodeAnnotations[desiredMachineConfigAnnotationKey] = "fakeVersion2"
1745+
upgradingNode.SetAnnotations(upgradingNodeAnnotations)
1746+
17631747
})
17641748

17651749
It("doesn't not remediate but requeues reconciliation and updates status", func() {
17661750

1767-
cr := findRemediationCRForNHC(unhealthyCPNodeName, underTest)
1751+
cr := findRemediationCRForNHC(unhealthyNodeName, underTest)
17681752
Expect(cr).To(BeNil())
17691753

17701754
Expect(*underTest.Status.HealthyNodes).To(Equal(0))
@@ -1774,20 +1758,16 @@ var _ = Describe("Node Health Check CR", func() {
17741758
Expect(underTest.Status.Reason).ToNot(BeEmpty())
17751759

17761760
By("stopping upgrade and waiting for requeue")
1777-
nodeList := v1.NodeList{}
1778-
Expect(k8sClient.List(context.TODO(), &nodeList)).To(Succeed())
1779-
for _, node := range nodeList.Items {
1780-
if len(node.GetAnnotations()) == 0 {
1781-
continue
1782-
}
1783-
if node.GetAnnotations()[currentMachineConfigAnnotationKey] != node.GetAnnotations()[desiredMachineConfigAnnotationKey] {
1784-
node.Annotations[currentMachineConfigAnnotationKey] = node.GetAnnotations()[desiredMachineConfigAnnotationKey]
1785-
Expect(k8sClient.Update(context.TODO(), &node)).To(Succeed())
1786-
}
1761+
unhealthyNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: unhealthyNodeName}}
1762+
Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(unhealthyNode), unhealthyNode)).To(Succeed())
1763+
if unhealthyNode.GetAnnotations()[currentMachineConfigAnnotationKey] != unhealthyNode.GetAnnotations()[desiredMachineConfigAnnotationKey] {
1764+
// Simulating upgrade complete.
1765+
unhealthyNode.Annotations[currentMachineConfigAnnotationKey] = unhealthyNode.GetAnnotations()[desiredMachineConfigAnnotationKey]
1766+
Expect(k8sClient.Update(context.TODO(), unhealthyNode)).To(Succeed())
17871767
}
17881768

17891769
time.Sleep(10 * time.Second)
1790-
cr = findRemediationCRForNHC(unhealthyCPNodeName, underTest)
1770+
cr = findRemediationCRForNHC(unhealthyNodeName, underTest)
17911771
Expect(cr).ToNot(BeNil())
17921772

17931773
Expect(k8sClient.Get(context.Background(), client.ObjectKeyFromObject(underTest), underTest)).To(Succeed())

controllers/suite_test.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,13 @@ var (
120120
ctx context.Context
121121
cancel context.CancelFunc
122122

123-
upgradeChecker *fakeClusterUpgradeChecker
124-
fakeTime *time.Time
125-
fakeRecorder *record.FakeRecorder
123+
upgradeChecker *fakeClusterUpgradeChecker
124+
ocpUpgradeChecker cluster.UpgradeChecker
125+
126+
fakeTime *time.Time
127+
fakeRecorder *record.FakeRecorder
128+
129+
nhcReconciler *NodeHealthCheckReconciler
126130
)
127131

128132
func TestAPIs(t *testing.T) {
@@ -238,15 +242,17 @@ var _ = BeforeSuite(func() {
238242

239243
mhcEvents := make(chan event.GenericEvent)
240244
fakeRecorder = record.NewFakeRecorder(1000)
241-
err = (&NodeHealthCheckReconciler{
245+
ocpUpgradeChecker, _ = cluster.NewClusterUpgradeStatusChecker(k8sManager, cluster.Capabilities{IsOnOpenshift: true})
246+
nhcReconciler = &NodeHealthCheckReconciler{
242247
Client: k8sManager.GetClient(),
243248
Log: k8sManager.GetLogger().WithName("test reconciler"),
244249
Recorder: fakeRecorder,
245250
ClusterUpgradeStatusChecker: upgradeChecker,
246251
MHCChecker: mhcChecker,
247252
MHCEvents: mhcEvents,
248253
Capabilities: caps,
249-
}).SetupWithManager(k8sManager)
254+
}
255+
err = nhcReconciler.SetupWithManager(k8sManager)
250256
Expect(err).NotTo(HaveOccurred())
251257

252258
err = (&MachineHealthCheckReconciler{
@@ -286,7 +292,7 @@ type fakeClusterUpgradeChecker struct {
286292
// force implementation of interface
287293
var _ cluster.UpgradeChecker = &fakeClusterUpgradeChecker{}
288294

289-
func (c *fakeClusterUpgradeChecker) Check() (bool, error) {
295+
func (c *fakeClusterUpgradeChecker) Check([]v1.Node) (bool, error) {
290296
return c.Upgrading, c.Err
291297
}
292298

0 commit comments

Comments
 (0)