Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/sk 752 #565

Merged
merged 5 commits into from
Apr 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions fedn/fedn/network/clients/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ def __init__(self, config):
combiner_config = self.assign()
self.connect(combiner_config)

self._initialize_dispatcher(config)
self._initialize_dispatcher(self.config)

self._initialize_helper(combiner_config)
if not self.helper:
logger.warning("Failed to retrieve helper class settings: {}".format(
combiner_config))

self._subscribe_to_combiner(config)
self._subscribe_to_combiner(self.config)

self.state = ClientState.idle

Expand Down Expand Up @@ -686,12 +686,6 @@ def process_request(self):
except grpc.RpcError as e:
logger.critical(f"GRPC process_request: An error occurred during process request: {e}")

def _handle_combiner_failure(self):
""" Register failed combiner connection."""
self._missed_heartbeat += 1
if self._missed_heartbeat > self.config['reconnect_after_missed_heartbeat']:
self.disconnect()

def _send_heartbeat(self, update_frequency=2.0):
"""Send a heartbeat to the combiner.

Expand All @@ -709,14 +703,18 @@ def _send_heartbeat(self, update_frequency=2.0):
except grpc.RpcError as e:
status_code = e.code()
if status_code == grpc.StatusCode.UNAVAILABLE:
logger.warning("GRPC hearbeat: server unavailable during send heartbeat. Retrying.")
self._missed_heartbeat += 1
logger.error("GRPC hearbeat: combiner unavailable, retrying (attempt {}/{}).".format(self._missed_heartbeat,
self.config['reconnect_after_missed_heartbeat']))
if self._missed_heartbeat > self.config['reconnect_after_missed_heartbeat']:
self.disconnect()
if status_code == grpc.StatusCode.UNAUTHENTICATED:
details = e.details()
if details == 'Token expired':
logger.warning("GRPC hearbeat: Token expired. Reconnecting.")
self.detach()
logger.error("GRPC hearbeat: Token expired. Disconnecting.")
self.disconnect()
sys.exit("Unauthorized. Token expired. Please obtain a new token.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why? this will break the client trying to refresh token?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, was thinking about that, but currently the client is not trying to refresh the token, right? So for now the right things seems to be to disconnect?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no that's not right they will try to refresh tokens, if you ask for new assignment and the token has expired it will try to get a new one. But only when calling REST-API

logger.debug(e)
self._handle_combiner_failure()

time.sleep(update_frequency)
if not self._connected:
Expand Down Expand Up @@ -780,11 +778,13 @@ def run(self):
if self.state != old_state:
logger.info("Client in {} state.".format(ClientStateToString(self.state)))
if not self._connected:
logger.info("Detached from combiner.")
# TODO: Implement a check/condition to ulitmately close down if too many reattachment attepts have failed. s
self.attach()
logger.warning("Client lost connection to combiner. Attempting to reconnect to FEDn network.")
combiner_config = self.assign()
self.connect(combiner_config)
self._subscribe_to_combiner(self.config)
cnt = 0
if self.error_state:
return
logger.error("Client in error state. Terminiating.")
sys.exit("Client in error state. Terminiating.")
except KeyboardInterrupt:
logger.info("Shutting down.")
Loading