Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/refactor proto #516

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ COPY config/settings-reducer.yaml.template /app/config/settings-reducer.yaml
COPY $REQUIREMENTS /app/config/requirements.txt

# Install developer tools (needed for psutil)
RUN apt-get update && apt-get install -y python3-dev gcc
RUN apt-get update && apt-get install -y python3-dev gcc wget

# Install grpc health probe checker
RUN GRPC_HEALTH_PROBE_VERSION=v0.4.24 && \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64 && \
chmod +x /bin/grpc_health_probe


# Create FEDn app directory
SHELL ["/bin/bash", "-c"]
Expand Down
14 changes: 12 additions & 2 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,16 @@ services:
- "/venv/bin/pip install --no-cache-dir -e /app/fedn && /venv/bin/fedn run combiner --init config/settings-combiner.yaml"
ports:
- 12080:12080
healthcheck:
test:
[
"CMD",
"/bin/grpc_health_probe",
"-addr=localhost:12080"
]
interval: 2s
timeout: 10s
retries: 5
depends_on:
- api-server

Expand All @@ -119,5 +129,5 @@ services:
deploy:
replicas: 0
depends_on:
- api-server
- combiner
combiner:
condition: service_healthy
5 changes: 2 additions & 3 deletions fedn/fedn/network/clients/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ def _connect(self, client_config):

self.channel = channel

self.connectorStub = rpc.ConnectorStub(channel)
self.combinerStub = rpc.CombinerStub(channel)
self.modelStub = rpc.ModelServiceStub(channel)

Expand Down Expand Up @@ -693,7 +692,7 @@ def _send_heartbeat(self, update_frequency=2.0):
heartbeat = fedn.Heartbeat(sender=fedn.Client(
name=self.name, role=fedn.WORKER))
try:
self.connectorStub.SendHeartbeat(heartbeat, metadata=self.metadata)
self.combinerStub.SendHeartbeat(heartbeat, metadata=self.metadata)
self._missed_heartbeat = 0
except grpc.RpcError as e:
status_code = e.code()
Expand Down Expand Up @@ -733,7 +732,7 @@ def _send_status(self, msg, log_level=fedn.Status.INFO, type=None, request=None)
self.logs.append(
"{} {} LOG LEVEL {} MESSAGE {}".format(str(datetime.now()), status.sender.name, status.log_level,
status.status))
_ = self.connectorStub.SendStatus(status, metadata=self.metadata)
_ = self.combinerStub.SendStatus(status, metadata=self.metadata)

def run(self):
""" Run the client. """
Expand Down
8 changes: 4 additions & 4 deletions fedn/fedn/network/combiner/combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def role_to_proto_role(role):
return fedn.OTHER


class Combiner(rpc.CombinerServicer, rpc.ReducerServicer, rpc.ConnectorServicer, rpc.ControlServicer):
class Combiner(rpc.CombinerServicer):
""" Combiner gRPC server.

:param config: configuration for the combiner
Expand Down Expand Up @@ -398,9 +398,9 @@ def _flush_model_update_queue(self):

#####################################################################################################################

# Control Service
# Controller Service

def Start(self, control: fedn.ControlRequest, context):
def StartRound(self, control: fedn.ControlRequest, context):
""" Start a round of federated learning"

:param control: the control request
Expand Down Expand Up @@ -475,7 +475,7 @@ def FlushAggregationQueue(self, control: fedn.ControlRequest, context):

##############################################################################

def Stop(self, control: fedn.ControlRequest, context):
def StopRound(self, control: fedn.ControlRequest, context):
""" TODO: Not yet implemented.

:param control: the control request
Expand Down
12 changes: 6 additions & 6 deletions fedn/fedn/network/combiner/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def flush_model_update_queue(self):

channel = Channel(self.address, self.port,
self.certificate).get_channel()
control = rpc.ControlStub(channel)
control = rpc.CombinerStub(channel)

request = fedn.ControlRequest()

Expand All @@ -196,7 +196,7 @@ def set_aggregator(self, aggregator):

channel = Channel(self.address, self.port,
self.certificate).get_channel()
control = rpc.ControlStub(channel)
control = rpc.CombinerStub(channel)

request = fedn.ControlRequest()
p = request.parameter.add()
Expand All @@ -221,7 +221,7 @@ def submit(self, config):
"""
channel = Channel(self.address, self.port,
self.certificate).get_channel()
control = rpc.ControlStub(channel)
control = rpc.CombinerStub(channel)
request = fedn.ControlRequest()
request.command = fedn.Command.START
for k, v in config.items():
Expand All @@ -230,7 +230,7 @@ def submit(self, config):
p.value = str(v)

try:
response = control.Start(request)
response = control.StartRound(request)
except grpc.RpcError as e:
if e.code() == grpc.StatusCode.UNAVAILABLE:
raise CombinerUnavailableError
Expand Down Expand Up @@ -272,7 +272,7 @@ def allowing_clients(self):
"""
channel = Channel(self.address, self.port,
self.certificate).get_channel()
connector = rpc.ConnectorStub(channel)
connector = rpc.CombinerStub(channel)
request = fedn.ConnectionRequest()

try:
Expand Down Expand Up @@ -302,7 +302,7 @@ def list_active_clients(self, queue=1):
"""
channel = Channel(self.address, self.port,
self.certificate).get_channel()
control = rpc.ConnectorStub(channel)
control = rpc.CombinerStub(channel)
request = fedn.ListClientsRequest()
request.channel = queue
try:
Expand Down
47 changes: 21 additions & 26 deletions fedn/fedn/network/grpc/fedn.proto
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,6 @@ message ControlResponse {
repeated Parameter parameter = 2;
}

service Control {
rpc Start(ControlRequest) returns (ControlResponse);
rpc Stop(ControlRequest) returns (ControlResponse);
rpc FlushAggregationQueue(ControlRequest) returns (ControlResponse);
rpc SetAggregator(ControlRequest) returns (ControlResponse);
}

service Reducer {
rpc GetGlobalModel (GetGlobalModelRequest) returns (GetGlobalModelResponse);
}

message ConnectionRequest {

}
Expand All @@ -220,13 +209,31 @@ message ConnectionResponse {
ConnectionStatus status = 1;
}

service Connector {
// Stream endpoint for status updates

service Combiner {

// Stream endpoints for training/validation pub/sub
rpc ModelUpdateRequestStream (ClientAvailableMessage) returns (stream ModelUpdateRequest);
rpc ModelUpdateStream (ClientAvailableMessage) returns (stream ModelUpdate);
rpc ModelValidationRequestStream (ClientAvailableMessage) returns (stream ModelValidationRequest);
rpc ModelValidationStream (ClientAvailableMessage) returns (stream ModelValidation);

rpc SendModelUpdate (ModelUpdate) returns (Response);
rpc SendModelValidation (ModelValidation) returns (Response);

rpc StartRound(ControlRequest) returns (ControlResponse);
rpc StopRound(ControlRequest) returns (ControlResponse);
rpc FlushAggregationQueue(ControlRequest) returns (ControlResponse);
rpc SetAggregator(ControlRequest) returns (ControlResponse);

rpc GetGlobalModel (GetGlobalModelRequest) returns (GetGlobalModelResponse);

// Stream endpoint for status updates
rpc AllianceStatusStream (ClientAvailableMessage) returns (stream Status);

// Report endpoint
rpc SendStatus (Status) returns (Response);
// rpc RegisterClient (ClientAvailableMessage) returns (Response);

// List active clients endpoint
rpc ListActiveClients (ListClientsRequest) returns (ClientList);

Expand All @@ -237,18 +244,6 @@ service Connector {

rpc ReassignClient (ReassignRequest) returns (Response);
rpc ReconnectClient (ReconnectRequest) returns (Response);
}

service Combiner {

// Stream endpoints for training/validation pub/sub
rpc ModelUpdateRequestStream (ClientAvailableMessage) returns (stream ModelUpdateRequest);
rpc ModelUpdateStream (ClientAvailableMessage) returns (stream ModelUpdate);
rpc ModelValidationRequestStream (ClientAvailableMessage) returns (stream ModelValidationRequest);
rpc ModelValidationStream (ClientAvailableMessage) returns (stream ModelValidation);

rpc SendModelUpdate (ModelUpdate) returns (Response);
rpc SendModelValidation (ModelValidation) returns (Response);

}

Loading
Loading