From e9643c61392e04ef5b8a67a613a1334659c58e98 Mon Sep 17 00:00:00 2001 From: rawdaGastan Date: Wed, 7 Aug 2024 17:11:29 +0300 Subject: [PATCH] result an error not deleting workloads after a crash --- client/node.go | 2 +- pkg/container/watch.go | 4 ++-- pkg/provision/engine.go | 17 ++++++----------- pkg/vm/monitor.go | 5 ++--- 4 files changed, 11 insertions(+), 17 deletions(-) diff --git a/client/node.go b/client/node.go index 073d0154b..72b6dea33 100644 --- a/client/node.go +++ b/client/node.go @@ -154,7 +154,7 @@ func (n *NodeClient) DeploymentList(ctx context.Context) (dls []gridtypes.Deploy return } -// DeploymentGet gets a deployment via contract ID +// DeploymentChanges gets a deployment changes via contract ID func (n *NodeClient) DeploymentChanges(ctx context.Context, contractID uint64) (changes []gridtypes.Workload, err error) { const cmd = "zos.deployment.changes" in := args{ diff --git a/pkg/container/watch.go b/pkg/container/watch.go index 157ca8829..ed6906cc6 100644 --- a/pkg/container/watch.go +++ b/pkg/container/watch.go @@ -55,11 +55,11 @@ func (c *Module) handlerEventTaskExit(ctx context.Context, ns string, event *eve <-time.After(restartDelay) // wait for 2 seconds reason = c.start(ns, event.ContainerID) } else { - reason = fmt.Errorf("deleting container due to so many crashes") + reason = fmt.Errorf("container failed due to so many crashes") } if reason != nil { - log.Debug().Err(reason).Msg("deleting container due to restart error") + log.Debug().Err(reason).Msg("resulting error for container due to restart error") stub := stubs.NewProvisionStub(c.client) if err := stub.DecommissionCached(ctx, event.ContainerID, reason.Error()); err != nil { diff --git a/pkg/provision/engine.go b/pkg/provision/engine.go index 5880fa27e..0135544d5 100644 --- a/pkg/provision/engine.go +++ b/pkg/provision/engine.go @@ -989,18 +989,13 @@ func (e *NativeEngine) DecommissionCached(id string, reason string) error { return nil } - //to bad we have to repeat this here - ctx := context.WithValue(context.Background(), engineKey{}, e) - ctx = withDeployment(ctx, twin, dlID) - - ctx, cancel := context.WithTimeout(ctx, 3*time.Minute) - defer cancel() - - err = e.uninstallWorkload(ctx, &gridtypes.WorkloadWithID{Workload: &wl, ID: globalID}, - fmt.Sprintf("workload decommissioned by system, reason: %s", reason), - ) + result := gridtypes.Result{ + State: gridtypes.StateError, + Error: reason, + Created: gridtypes.Timestamp(time.Now().Unix()), + } - return err + return e.storage.Transaction(twin, dlID, wl.WithResults(result)) } func (n *NativeEngine) CreateOrUpdate(twin uint32, deployment gridtypes.Deployment, update bool) error { diff --git a/pkg/vm/monitor.go b/pkg/vm/monitor.go index 9a068afcd..594060bea 100644 --- a/pkg/vm/monitor.go +++ b/pkg/vm/monitor.go @@ -216,12 +216,11 @@ func (m *Module) monitorID(ctx context.Context, running map[string]Process, id s reason = m.withLogs(m.logsPath(id), err) } } else { - reason = fmt.Errorf("deleting vm due to so many crashes") + reason = fmt.Errorf("resulting error for vm due to so many crashes") } if reason != nil { - log.Debug().Err(reason).Msg("deleting vm due to restart error") - m.removeConfig(id) + log.Debug().Err(reason).Msg("resulting error for vm due to restart error") if err := stub.DecommissionCached(ctx, id, reason.Error()); err != nil { return errors.Wrapf(err, "failed to decommission reservation '%s'", id)