Fixed an issue related to client samplers. (#177)

cying17 · baochunli · web-flow · commit 570d0bf7cc7f · 2022-05-19T21:59:46.000-04:00
Co-authored-by: Baochun Li &lt;bli@ece.toronto.edu&gt;
diff --git a/docs/Configuration.md b/docs/Configuration.md
@@ -23,7 +23,7 @@ Attributes in **bold** must be included in a configuration file, while attribute
 |||`mistnet`|A client for MistNet|
 |**total_clients**|The total number of clients|A positive number||
 |**per_round**|The number of clients selected in each round| Any positive integer that is not larger than **total_clients**||
-|**do_test**|Whether the clients compute test accuracy locally| `true` or `false`|if `true` and the configuration file has `results` section, a CSV file will log test accuracy of every selected client in each round| 
+|do_test|Whether the clients compute test accuracy locally| `true` or `false`|if `true` and the configuration file has `results` section, a CSV file will log test accuracy of every selected client in each round| 
 |speed_simulation|Whether we simulate client heterogeneity in training speed|
 |simulation_distribution|Parameters for simulating client heterogeneity in training speed|`distribution`|`normal` for normal or `zipf` for Zipf|
 |||`s`|the parameter `s` in Zipf distribution|
@@ -76,6 +76,7 @@ Attributes in **bold** must be included in a configuration file, while attribute
 |disable_clients|If this optional setting is enabled as `true`, the server will not launched client processes on the same machine.||
 |s3_endpoint_url|The endpoint URL for an S3-compatible storage service, used for transferring payloads between clients and servers.||
 |s3_bucket|The bucket name for an S3-compatible storage service, used for transferring payloads between clients and servers.||
+|random_seed|Use a fixed random seed for selecting clients (and sampling testset if needed) so that experiments are reproducible||
 |ping_interval|The time interval in seconds at which the server pings the client. ||default: 3600|
 |ping_timeout| The time in seconds that the client waits for the server to respond before disconnecting.|| default: 3600|
 |synchronous|Synchronous or asynchronous mode|`true` or `false`||
@@ -120,7 +121,7 @@ Attributes in **bold** must be included in a configuration file, while attribute
 |||`mixed`|Some data are iid, while others are non-iid. Must have *non_iid_clients* attributes|
 |test_set_sampler|How to sample the test set when clients test locally|Could be any **sampler**|Without this parameter, every client's test set is the test set of the datasource|
 |edge_test_set_sampler|How to sample the test set when edge servers test locally|Could be any **sampler**|Without this parameter, edge servers' test sets are the test set of the datasource if they locally test their aggregated models in cross-silo FL|
-|random_seed|Use a fixed random seed so that experiments are reproducible (clients always have the same datasets)||
+|random_seed|Use a fixed random seed to sample each client's dataset so that experiments are reproducible||
 |**partition_size**|Number of samples in each client's dataset|Any positive integer||
 |concentration| The concentration parameter of symmetric Dirichlet distribution, used by `noniid` **sampler** || default: 1|
 |*non_iid_clients*|Indexs of clients whose datasets are non-iid. Other clients' datasets are iid|e.g., 4|Must have this attribute if the **sampler** is `mixed`|
diff --git a/examples/adaptive_hgb/adaptive_hgb_client.py b/examples/adaptive_hgb/adaptive_hgb_client.py
@@ -95,7 +95,7 @@ def load_data(self) -> None:
 
         self.valset = self.datasource.get_val_set()
 
-        if Config().clients.do_test:
+        if hasattr(Config().clients, 'do_test') and Config().clients.do_test:
             # Set the testset if local testing is needed
             self.testset = self.datasource.get_test_set()
 
@@ -225,15 +225,15 @@ async def train(self):
         delta_o, delta_g = self.obtain_delta_og()
 
         # Generate a report for the server, performing model testing if applicable
-        if Config().clients.do_test:
+        if hasattr(Config().clients, 'do_test') and Config().clients.do_test:
             accuracy = self.trainer.test(self.testset)
 
             if accuracy == 0:
                 # The testing process failed, disconnect from the server
                 await self.sio.disconnect()
 
-            logging.info("[Client #{:d}] Test accuracy: {:.2f}%".format(
-                self.client_id, 100 * accuracy))
+            logging.info('[Client #%d] Test accuracy: %.2f%%.', self.client_id,
+                         100 * accuracy)
         else:
             accuracy = 0
 
diff --git a/examples/fedasync/fedasync_server.py b/examples/fedasync/fedasync_server.py
@@ -76,7 +76,7 @@ async def process_reports(self):
         self.algorithm.load_weights(updated_weights)
 
         # Testing the global model accuracy
-        if Config().clients.do_test:
+        if hasattr(Config().server, 'do_test') and not Config().server.do_test:
             # Compute the average accuracy from client reports
             self.accuracy = self.accuracy_averaging(self.updates)
             logging.info('[%s] Average client accuracy: %.2f%%.', self,
diff --git a/examples/fedunlearning_baseline/fedunlearning_client.py b/examples/fedunlearning_baseline/fedunlearning_client.py
@@ -46,8 +46,8 @@ def process_server_response(self, server_response):
                 self,
                 Config().clients.deleted_data_ratio * 100)
 
-            if (hasattr(Config().data, 'reload_data')
-                    and Config().data.reload_data) or not self.data_loaded:
+            if not hasattr(Config().data,
+                           'reload_data') or Config().data.reload_data:
                 logging.info("[%s] Loading the dataset.", self)
                 self.load_data()
 
diff --git a/plato/clients/base.py b/plato/clients/base.py
@@ -85,7 +85,6 @@ def __init__(self) -> None:
         self.sio = None
         self.chunks = []
         self.server_payload = None
-        self.data_loaded = False  # is training data already loaded from the disk?
         self.s3_client = None
         self.outbound_processor = None
         self.inbound_processor = None
@@ -156,8 +155,8 @@ async def payload_to_arrive(self, response) -> None:
 
         logging.info("[Client #%d] Selected by the server.", self.client_id)
 
-        if (hasattr(Config().data, 'reload_data')
-                and Config().data.reload_data) or not self.data_loaded:
+        if not hasattr(Config().data,
+                       'reload_data') or Config().data.reload_data:
             self.load_data()
 
         if self.comm_simulation:
diff --git a/plato/clients/simple.py b/plato/clients/simple.py
@@ -86,8 +86,6 @@ def load_data(self) -> None:
             self.datasource = datasources_registry.get(
                 client_id=self.client_id)
 
-        self.data_loaded = True
-
         logging.info("[%s] Dataset size: %s", self,
                      self.datasource.num_train_examples())
 
@@ -102,7 +100,7 @@ def load_data(self) -> None:
             # PyTorch uses samplers when loading data with a data loader
             self.trainset = self.datasource.get_train_set()
 
-        if Config().clients.do_test:
+        if hasattr(Config().clients, 'do_test') and Config().clients.do_test:
             # Set the testset if local testing is needed
             self.testset = self.datasource.get_test_set()
             if hasattr(Config().data, 'testset_sampler'):
@@ -130,9 +128,9 @@ async def train(self):
         weights = self.algorithm.extract_weights()
 
         # Generate a report for the server, performing model testing if applicable
-        if Config().clients.do_test and (
-                not hasattr(Config().clients, 'test_interval')
-                or self.current_round % Config().clients.test_interval == 0):
+        if (hasattr(Config().clients, 'do_test') and Config().clients.do_test
+            ) and (not hasattr(Config().clients, 'test_interval') or
+                   self.current_round % Config().clients.test_interval == 0):
             accuracy = self.trainer.test(self.testset, self.testset_sampler)
 
             if accuracy == -1:
diff --git a/plato/samplers/dirichlet.py b/plato/samplers/dirichlet.py
@@ -12,6 +12,7 @@
 class Sampler(base.Sampler):
     """Create a data sampler for each client to use a divided partition of the
     dataset, biased across labels according to the Dirichlet distribution."""
+
     def __init__(self, datasource, client_id, testing):
         super().__init__()
 
@@ -26,12 +27,11 @@ def __init__(self, datasource, client_id, testing):
 
             if dist.distribution.lower() == "uniform":
                 self.partition_size *= np.random.uniform(dist.low, dist.high)
-                
+
             if dist.distribution.lower() == "normal":
                 self.partition_size *= np.random.normal(dist.mean, dist.high)
 
-            self.partition_size = int(self.partition_size)    
-
+            self.partition_size = int(self.partition_size)
 
         # Concentration parameter to be used in the Dirichlet distribution
         concentration = Config().data.concentration if hasattr(
diff --git a/plato/samplers/iid.py b/plato/samplers/iid.py
@@ -15,6 +15,7 @@ class Sampler(base.Sampler):
 
     def __init__(self, datasource, client_id, testing):
         super().__init__()
+
         if testing:
             dataset = datasource.get_test_set()
         else:
@@ -45,8 +46,8 @@ def get(self):
         """Obtains an instance of the sampler. """
         gen = torch.Generator()
         gen.manual_seed(self.random_seed)
-        version = torch.__version__
-        if int(version[0]) <= 1 and int(version[2]) <= 5:
+        version = torch.__version__.split(".")
+        if int(version[0]) <= 1 and int(version[1]) <= 5:
             return SubsetRandomSampler(self.subset_indices)
         return SubsetRandomSampler(self.subset_indices, generator=gen)
 
diff --git a/plato/servers/fedavg.py b/plato/servers/fedavg.py
@@ -108,7 +108,7 @@ def configure(self):
                                      Config().params['result_path'])
 
         # Initialize the test accuracy csv file if clients compute locally
-        if Config().clients.do_test:
+        if hasattr(Config().clients, 'do_test') and Config().clients.do_test:
             accuracy_csv_file = f"{Config().params['result_path']}/{os.getpid()}_accuracy.csv"
             accuracy_headers = ["round", "client_id", "accuracy"]
             csv_processor.initialize_csv(accuracy_csv_file, accuracy_headers,
@@ -223,7 +223,7 @@ async def wrap_up_processing_reports(self):
         result_csv_file = f"{Config().params['result_path']}/{os.getpid()}.csv"
         csv_processor.write_csv(result_csv_file, new_row)
 
-        if Config().clients.do_test:
+        if hasattr(Config().clients, 'do_test') and Config().clients.do_test:
             # Updates the log for client test accuracies
             accuracy_csv_file = f"{Config().params['result_path']}/{os.getpid()}_accuracy.csv"
 
diff --git a/plato/servers/mistnet.py b/plato/servers/mistnet.py
@@ -49,7 +49,7 @@ async def process_reports(self):
                              Config().algorithm.cut_layer)
 
         # Test the updated model
-        if not Config().clients.do_test:
+        if not hasattr(Config().server, 'do_test') or Config().server.do_test:
             self.accuracy = self.trainer.test(self.testset)
             logging.info('[%s] Global model accuracy: %.2f%%\n', self,
                          100 * self.accuracy)