From 0b83d6b42554697e523ce1c67b228ce88b02c8df Mon Sep 17 00:00:00 2001 From: stefanhellander <59477428+stefanhellander@users.noreply.github.com> Date: Wed, 15 May 2024 08:58:28 +0200 Subject: [PATCH 1/3] Change timeout of deamon_thread_client_status so that it doesn't match period of client online status cutoff of 10s (#605) --- fedn/network/combiner/combiner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fedn/network/combiner/combiner.py b/fedn/network/combiner/combiner.py index 450b8b689..f9491b90e 100644 --- a/fedn/network/combiner/combiner.py +++ b/fedn/network/combiner/combiner.py @@ -337,7 +337,7 @@ def _list_active_clients(self, channel): return clients["active_clients"] - def _deamon_thread_client_status(self, timeout=10): + def _deamon_thread_client_status(self, timeout=5): """Deamon thread that checks for inactive clients and updates statestore.""" while True: time.sleep(timeout) From 368850b065f05fbbf9d57c049e9871f3ea660ecd Mon Sep 17 00:00:00 2001 From: stefanhellander <59477428+stefanhellander@users.noreply.github.com> Date: Thu, 16 May 2024 15:20:39 +0200 Subject: [PATCH 2/3] bugfix/SK-850 | Sets client status to online in database when client connects (#606) * Sets client status to online in database when client connects. Also clean up status of previously connected clients on startup. --- fedn/network/combiner/combiner.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fedn/network/combiner/combiner.py b/fedn/network/combiner/combiner.py index f9491b90e..f66e2af8f 100644 --- a/fedn/network/combiner/combiner.py +++ b/fedn/network/combiner/combiner.py @@ -65,7 +65,6 @@ def __init__(self, config): # Client queues self.clients = {} - self.modelservice = ModelService() # Validate combiner name match = re.search(VALID_NAME_REGEX, config["name"]) @@ -122,6 +121,17 @@ def __init__(self, config): self.repository = Repository(announce_config["storage"]["storage_config"]) self.statestore = MongoStateStore(announce_config["statestore"]["network_id"], announce_config["statestore"]["mongo_config"]) + + # Fetch all clients previously connected to the combiner + # If a client and a combiner goes down at the same time, + # the client will be stuck listed as "online" in the statestore. + # Set the status to offline for previous clients. + previous_clients = self.statestore.clients.find({"combiner": config["name"]}) + for client in previous_clients: + self.statestore.set_client({"name": client["name"], "status": "offline"}) + + self.modelservice = ModelService() + # Create gRPC server self.server = Server(self, self.modelservice, grpc_config) @@ -600,6 +610,10 @@ def TaskStream(self, response, context): self._send_status(status) + # Set client status to online + self.clients[client.name]["status"] = "online" + self.statestore.set_client({"name": client.name, "status": "online"}) + # Keep track of the time context has been active start_time = time.time() while context.is_active(): From 186a1bf16c7771c765021942bdcd54bf26395e78 Mon Sep 17 00:00:00 2001 From: stefanhellander <59477428+stefanhellander@users.noreply.github.com> Date: Thu, 16 May 2024 16:50:55 +0200 Subject: [PATCH 3/3] Bugfix/SK-846 (#604) * Set to offline if previous status was online or available --- fedn/network/combiner/combiner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fedn/network/combiner/combiner.py b/fedn/network/combiner/combiner.py index f66e2af8f..8eacd917e 100644 --- a/fedn/network/combiner/combiner.py +++ b/fedn/network/combiner/combiner.py @@ -336,7 +336,7 @@ def _list_active_clients(self, channel): if status != "online": self.clients[client]["status"] = "online" clients["update_active_clients"].append(client) - elif status == "online": + elif status != "offline": self.clients[client]["status"] = "offline" clients["update_offline_clients"].append(client) # Update statestore with client status