Change the printing style in logging

Guillaume Lemaitre · Guillaume Lemaitre · commit 721bc2ae3325 · 2016-07-06T00:43:16.000+02:00
diff --git a/imblearn/base.py b/imblearn/base.py
@@ -102,8 +102,8 @@ def fit(self, X, y):
         self.min_c_ = min(self.stats_c_, key=self.stats_c_.get)
         self.maj_c_ = max(self.stats_c_, key=self.stats_c_.get)
 
-        self.logger.info('{} classes detected: {}'.format(uniques.size,
-                                                          self.stats_c_))
+        self.logger.info('%s classes detected: %s', uniques.size,
+                         self.stats_c_)
 
         # Check if the ratio provided at initialisation make sense
         if isinstance(self.ratio, float):
diff --git a/imblearn/ensemble/balance_cascade.py b/imblearn/ensemble/balance_cascade.py
@@ -238,8 +238,7 @@ def _sample(self, X, y):
             # Find the misclassified index to keep them for the next round
             idx_mis_class = idx_sel_from_maj[np.nonzero(pred_label !=
                                                         N_y[idx_sel_from_maj])]
-            self.logger.debug('Elements misclassified: {}'.format(
-                idx_mis_class))
+            self.logger.debug('Elements misclassified: %s', idx_mis_class)
 
             # Count how many random element will be selected
             if self.ratio == 'auto':
@@ -248,7 +247,7 @@ def _sample(self, X, y):
                 num_samples = int(self.stats_c_[self.min_c_] / self.ratio)
             num_samples -= idx_mis_class.size
 
-            self.logger.debug('Creation of the subset #{}'.format(n_subsets))
+            self.logger.debug('Creation of the subset #%s', n_subsets)
 
             # We found a new subset, increase the counter
             n_subsets += 1
@@ -275,8 +274,7 @@ def _sample(self, X, y):
                                                          idx_sel_from_maj),
                                                         axis=0))
 
-                    self.logger.debug('Creation of the subset #{}'.format(
-                        n_subsets))
+                    self.logger.debug('Creation of the subset #%s', n_subsets)
 
                     # We found a new subset, increase the counter
                     n_subsets += 1
@@ -304,8 +302,7 @@ def _sample(self, X, y):
                     idx_under.append(np.concatenate((idx_min,
                                                      idx_sel_from_maj),
                                                     axis=0))
-                self.logger.debug('Creation of the subset #{}'.format(
-                        n_subsets))
+                self.logger.debug('Creation of the subset #%s', n_subsets)
 
                 # We found a new subset, increase the counter
                 n_subsets += 1
diff --git a/imblearn/ensemble/easy_ensemble.py b/imblearn/ensemble/easy_ensemble.py
@@ -104,7 +104,7 @@ def _sample(self, X, y):
             idx_under = []
 
         for s in range(self.n_subsets):
-            self.logger.debug('Creation of the set #{}'.format(s))
+            self.logger.debug('Creation of the set #%s', s)
 
             # Create the object for random under-sampling
             rus = RandomUnderSampler(ratio=self.ratio,
diff --git a/imblearn/over_sampling/adasyn.py b/imblearn/over_sampling/adasyn.py
@@ -120,8 +120,7 @@ def _sample(self, X, y):
         X_min = X[y == self.min_c_]
 
         # Print if verbose is true
-        self.logger.debug('Finding the {} nearest neighbours...'.format(
-            self.k))
+        self.logger.debug('Finding the %s nearest neighbours ...', self.k)
 
         # Look for k-th nearest neighbours, excluding, of course, the
         # point itself.
@@ -151,7 +150,7 @@ def _sample(self, X, y):
                 X_resampled = np.vstack((X_resampled, x_gen))
                 y_resampled = np.hstack((y_resampled, self.min_c_))
 
-        self.logger.info('Over-sampling performed: {}'.format(Counter(
-            y_resampled)))
+        self.logger.info('Over-sampling performed: %s', Counter(
+            y_resampled))
 
         return X_resampled, y_resampled
diff --git a/imblearn/over_sampling/random_over_sampler.py b/imblearn/over_sampling/random_over_sampler.py
@@ -114,7 +114,7 @@ def _sample(self, X, y):
                                           y[y == key],
                                           y[y == key][indx]), axis=0)
 
-        self.logger.info('Over-sampling performed: {}'.format(Counter(
-            y_resampled)))
+        self.logger.info('Over-sampling performed: %s', Counter(
+            y_resampled))
 
         return X_resampled, y_resampled
diff --git a/imblearn/over_sampling/smote.py b/imblearn/over_sampling/smote.py
@@ -229,7 +229,7 @@ def _make_samples(self, X, y_type, nn_data, nn_num, n_samples,
         # minority label
         y_new = np.array([y_type] * len(X_new))
 
-        self.logger.info('Generated {} new samples ...'.format(len(X_new)))
+        self.logger.info('Generated %s new samples ...', len(X_new))
 
         return X_new, y_new
 
@@ -276,8 +276,7 @@ def _sample(self, X, y):
         # If regular SMOTE is to be performed
         if self.kind == 'regular':
 
-            self.logger.debug('Finding the {} nearest neighbours...'.format(
-                self.k))
+            self.logger.debug('Finding the %s nearest neighbours ...', self.k)
 
             # Look for k-th nearest neighbours, excluding, of course, the
             # point itself.
@@ -308,8 +307,7 @@ def _sample(self, X, y):
 
         if self.kind == 'borderline1' or self.kind == 'borderline2':
 
-            self.logger.debug('Finding the {} nearest neighbours ...'.format(
-                self.m))
+            self.logger.debug('Finding the %s nearest neighbours ...', self.m)
 
             # Find the NNs for all samples in the data set.
             self.nearest_neighbour.fit(X)
@@ -413,8 +411,7 @@ def _sample(self, X, y):
 
             # First, find the nn of all the samples to identify samples
             # in danger and noisy ones
-            self.logger.debug('Finding the {} nearest neighbours ...'.format(
-                self.m))
+            self.logger.debug('Finding the %s nearest neighbours ...', self.m)
 
             # As usual, fit a nearest neighbour model to the data
             self.nearest_neighbour.fit(X)
@@ -428,17 +425,16 @@ def _sample(self, X, y):
                                                 kind='danger')
             safety_bool = np.logical_not(danger_bool)
 
-            self.logger.debug('Out of {0} support vectors, {1} are noisy, '
-                              '{2} are in danger '
-                              'and {3} are safe.'.format(
-                                  support_vector.shape[0],
-                                  noise_bool.sum().astype(int),
-                                  danger_bool.sum().astype(int),
-                                  safety_bool.sum().astype(int)))
+            self.logger.debug('Out of %s support vectors, %s are noisy, '
+                              '%s are in danger '
+                              'and %s are safe.',
+                              support_vector.shape[0],
+                              noise_bool.sum().astype(int),
+                              danger_bool.sum().astype(int),
+                              safety_bool.sum().astype(int))
 
             # Proceed to find support vectors NNs among the minority class
-            self.logger.debug('Finding the {} nearest neighbours ...'.format(
-                self.k))
+            self.logger.debug('Finding the %s nearest neighbours ...', self.k)
 
             self.nearest_neighbour.set_params(**{'n_neighbors': self.k + 1})
             self.nearest_neighbour.fit(X_min)
diff --git a/imblearn/under_sampling/cluster_centroids.py b/imblearn/under_sampling/cluster_centroids.py
@@ -127,7 +127,7 @@ def _sample(self, X, y):
                                                                 num_samples)),
                                          axis=0)
 
-        self.logger.info('Under-sampling performed: {}'.format(Counter(
-            y_resampled)))
+        self.logger.info('Under-sampling performed: %s', Counter(
+            y_resampled))
 
         return X_resampled, y_resampled
diff --git a/imblearn/under_sampling/condensed_nearest_neighbour.py b/imblearn/under_sampling/condensed_nearest_neighbour.py
@@ -196,8 +196,8 @@ def _sample(self, X, y):
             X_resampled = np.concatenate((X_resampled, sel_x), axis=0)
             y_resampled = np.concatenate((y_resampled, sel_y), axis=0)
 
-        self.logger.info('Under-sampling performed: {}'.format(Counter(
-            y_resampled)))
+        self.logger.info('Under-sampling performed: %s', Counter(
+            y_resampled))
 
         # Check if the indices of the samples selected should be returned too
         if self.return_indices:
diff --git a/imblearn/under_sampling/edited_nearest_neighbours.py b/imblearn/under_sampling/edited_nearest_neighbours.py
@@ -173,8 +173,8 @@ def _sample(self, X, y):
             X_resampled = np.concatenate((X_resampled, sel_x), axis=0)
             y_resampled = np.concatenate((y_resampled, sel_y), axis=0)
 
-        self.logger.info("Under-sampling performed: {}".format(Counter(
-            y_resampled)))
+        self.logger.info('Under-sampling performed: %s', Counter(
+            y_resampled))
 
         # Check if the indices of the samples selected should be returned too
         if self.return_indices:
@@ -326,7 +326,7 @@ def _sample(self, X, y):
 
         for n_iter in range(self.max_iter):
 
-            self.logger.debug('Apply ENN iteration #{}'.format(n_iter + 1))
+            self.logger.debug('Apply ENN iteration #%s', n_iter + 1)
 
             prev_len = y_.shape[0]
             if self.return_indices:
@@ -338,7 +338,7 @@ def _sample(self, X, y):
             if prev_len == y_.shape[0]:
                 break
 
-        self.logger.info("Under-sampling performed: {}".format(Counter(y_)))
+        self.logger.info('Under-sampling performed: %s', Counter(y_))
 
         X_resampled, y_resampled = X_, y_
 
diff --git a/imblearn/under_sampling/instance_hardness_threshold.py b/imblearn/under_sampling/instance_hardness_threshold.py
@@ -188,8 +188,8 @@ def _sample(self, X, y):
         X_resampled = X[mask]
         y_resampled = y[mask]
 
-        self.logger.info('Under-sampling performed: {}'.format(Counter(
-            y_resampled)))
+        self.logger.info('Under-sampling performed: %s', Counter(
+            y_resampled))
 
         # If we need to offer support for the indices
         if self.return_indices:
diff --git a/imblearn/under_sampling/nearmiss.py b/imblearn/under_sampling/nearmiss.py
@@ -290,8 +290,8 @@ def _sample(self, X, y):
             X_resampled = np.concatenate((X_resampled, sel_x), axis=0)
             y_resampled = np.concatenate((y_resampled, sel_y), axis=0)
 
-        self.logger.info('Under-sampling performed: {}'.format(Counter(
-            y_resampled)))
+        self.logger.info('Under-sampling performed: %s', Counter(
+            y_resampled))
 
         # Check if the indices of the samples selected should be returned too
         if self.return_indices:
diff --git a/imblearn/under_sampling/neighbourhood_cleaning_rule.py b/imblearn/under_sampling/neighbourhood_cleaning_rule.py
@@ -167,8 +167,8 @@ def _sample(self, X, y):
         X_resampled = np.concatenate((X_resampled, sel_x), axis=0)
         y_resampled = np.concatenate((y_resampled, sel_y), axis=0)
 
-        self.logger.info('Under-sampling performed: {}'.format(Counter(
-            y_resampled)))
+        self.logger.info('Under-sampling performed: %s', Counter(
+            y_resampled))
 
         # Check if the indices of the samples selected should be returned too
         if self.return_indices:
diff --git a/imblearn/under_sampling/one_sided_selection.py b/imblearn/under_sampling/one_sided_selection.py
@@ -174,8 +174,8 @@ def _sample(self, X, y):
         self.logger.debug('Looking for majority Tomek links ...')
         links = TomekLinks.is_tomek(y_resampled, nns, self.min_c_)
 
-        self.logger.info('Under-sampling performed: {}'.format(Counter(
-            y_resampled[np.logical_not(links)])))
+        self.logger.info('Under-sampling performed: %s', Counter(
+            y_resampled[np.logical_not(links)]))
 
         # Check if the indices of the samples selected should be returned too
         if self.return_indices:
diff --git a/imblearn/under_sampling/random_under_sampler.py b/imblearn/under_sampling/random_under_sampler.py
@@ -130,8 +130,7 @@ def _sample(self, X, y):
             y_resampled = np.concatenate((y_resampled, y[y == key][indx]),
                                          axis=0)
 
-        self.logger.info("Under-sampling performed: {}".format(
-            Counter(y_resampled)))
+        self.logger.info('Under-sampling performed: %s', Counter(y_resampled))
 
         # Check if the indices of the samples selected should be returned as
         # well
diff --git a/imblearn/under_sampling/tomek_links.py b/imblearn/under_sampling/tomek_links.py
@@ -148,8 +148,8 @@ def _sample(self, X, y):
         self.logger.debug('Looking for majority Tomek links ...')
         links = self.is_tomek(y, nns, self.min_c_)
 
-        self.logger.info('Under-sampling performed: {}'.format(Counter(
-            y[np.logical_not(links)])))
+        self.logger.info('Under-sampling performed: %s', Counter(
+            y[np.logical_not(links)]))
 
         # Check if the indices of the samples selected should be returned too
         if self.return_indices: