Skip to content

Commit 84587a8

Browse files
author
Pushkar Acharya
committed
Fix the instance exists when the instance is not found
Improves the handling of cases where instances are not present on crusoe cloud but present in the k8s nodes list. This commit also reduces the interval, that we wait before reporting an instance as missing, from 5 minutes to 2 minutes.
1 parent f71a782 commit 84587a8

File tree

1 file changed

+11
-8
lines changed

1 file changed

+11
-8
lines changed

internal/instances/instances.go

+11-8
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ import (
1717
)
1818

1919
const (
20-
FIVE = 5
21-
ProviderPrefix = "crusoe://"
20+
InstanceNotFoundInterval = 2 * time.Minute
21+
ProviderPrefix = "crusoe://"
2222
)
2323

2424
var ErrAssertTimeTypeFailed = errors.New("failed to assert type time.Time for firstSeen")
@@ -125,8 +125,9 @@ func (i *Instances) InstanceShutdown(ctx context.Context, node *v1.Node) (bool,
125125
return i.InstanceShutdownByProviderID(ctx, providerID)
126126
}
127127

128+
//nolint:cyclop // must perform all checks before returning instance does not exists
128129
func (i *Instances) InstanceExistsByProviderID(ctx context.Context, providerID string) (bool, error) {
129-
_, responseBody, err := i.apiClient.GetInstanceByID(ctx, getInstanceIDFromProviderID(providerID))
130+
inst, responseBody, err := i.apiClient.GetInstanceByID(ctx, getInstanceIDFromProviderID(providerID))
130131
if responseBody != nil {
131132
defer responseBody.Body.Close()
132133
}
@@ -144,17 +145,19 @@ func (i *Instances) InstanceExistsByProviderID(ctx context.Context, providerID s
144145
}
145146
firstSeenTime, ok := firstSeen.(time.Time)
146147
if !ok {
147-
return false, ErrAssertTimeTypeFailed
148+
// update the in-memory state to current time so that we can process it in next iteration
149+
i.nodeFirstSeen.Store(providerID, currTime)
150+
firstSeenTime = currTime
148151
}
149152
timeDiff := currTime.Sub(firstSeenTime)
150-
if responseBody != nil && responseBody.StatusCode == 404 {
151-
if timeDiff < FIVE*time.Minute {
153+
if inst == nil || (responseBody != nil && responseBody.StatusCode == 404) {
154+
if timeDiff < InstanceNotFoundInterval {
152155
klog.Infof("timediff: %v", timeDiff)
153-
klog.Infof("Node %v first seen less than 5 minute ago", providerID)
156+
klog.Infof("Node %v not seen for less than 2 minutes", providerID)
154157

155158
return true, nil
156159
}
157-
klog.Infof("Node %v first seen more than 5 minute ago", providerID)
160+
klog.Infof("Node %v not seen for more than 2 minutes", providerID)
158161

159162
return false, nil
160163
}

0 commit comments

Comments
 (0)