From 0c4b2433006c493fb2dae508bef47eef97b9c031 Mon Sep 17 00:00:00 2001
From: Saves Paul <paul.saves@alumni.enac.fr>
Date: Thu, 15 Feb 2024 14:26:52 +0100
Subject: [PATCH] fix bug for evaluating noise automatically for redundants
 points with noisy output when the inputs are mixed categorical (#509)

* cleaning X,y and standardization management in krg_based
---
 smt/applications/tests/test_mixed_integer.py | 76 +++++++++++++++++++
 smt/surrogate_models/krg_based.py            | 80 +++++++++++---------
 2 files changed, 120 insertions(+), 36 deletions(-)

diff --git a/smt/applications/tests/test_mixed_integer.py b/smt/applications/tests/test_mixed_integer.py
index 94cdf57d1..022de35d4 100644
--- a/smt/applications/tests/test_mixed_integer.py
+++ b/smt/applications/tests/test_mixed_integer.py
@@ -1553,6 +1553,82 @@ def test_mixed_homo_hyp_3D_PLS_cate(self):
         self.assertTrue((np.abs(np.sum(np.array(sm.predict_values(xt) - yt)))) < 1e-6)
         self.assertTrue((np.abs(np.sum(np.array(sm.predict_variances(xt) - 0)))) < 1e-6)
 
+    def test_compound_hetero_noise_auto(self):
+        xt1 = np.array([[0, 0.0], [0, 2.0], [0, 4.0]])
+        xt2 = np.array([[1, 0.0], [1, 2.0], [1, 3.0]])
+        xt3 = np.array([[2, 1.0], [2, 2.0], [2, 4.0]])
+
+        xt = np.concatenate((xt1, xt2, xt3), axis=0)
+        xt[:, 1] = xt[:, 1].astype(np.float64)
+        yt1 = np.array([0.0, 9.0, 16.0])
+        yt2 = np.array([0.0, -4, -13.0])
+        yt3 = np.array([-10, 3, 11.0])
+        yt = np.concatenate((yt1, yt2, yt3), axis=0)
+
+        design_space = DesignSpace(
+            [
+                CategoricalVariable(["Blue", "Red", "Green"]),
+                # OrdinalVariable([0,1,2]),
+                FloatVariable(0, 4),
+            ]
+        )
+
+        # Surrogate
+        sm = MixedIntegerKrigingModel(
+            surrogate=KRG(
+                design_space=design_space,
+                categorical_kernel=MixIntKernelType.COMPOUND_SYMMETRY,
+                theta0=[1e-1],
+                corr="squar_exp",
+                n_start=20,
+                eval_noise=True,
+                #     noise0 = [1.0,0.1,10,0.01,1.0,0.1,10,0.01,10],
+                use_het_noise=True,
+                hyper_opt="Cobyla",
+            ),
+        )
+        sm.set_training_values(xt, yt)
+        sm.train()
+
+        # DOE for validation
+        n = 100
+        x_cat1 = []
+        x_cat2 = []
+        x_cat3 = []
+
+        for i in range(n):
+            x_cat1.append(0)
+            x_cat2.append(1)
+            x_cat3.append(2)
+
+        x_cont = np.linspace(0.0, 4.0, n)
+        x1 = np.concatenate(
+            (np.asarray(x_cat1).reshape(-1, 1), x_cont.reshape(-1, 1)), axis=1
+        )
+        x2 = np.concatenate(
+            (np.asarray(x_cat2).reshape(-1, 1), x_cont.reshape(-1, 1)), axis=1
+        )
+        x3 = np.concatenate(
+            (np.asarray(x_cat3).reshape(-1, 1), x_cont.reshape(-1, 1)), axis=1
+        )
+
+        y1 = sm.predict_values(x1)
+        y2 = sm.predict_values(x2)
+        y3 = sm.predict_values(x3)
+
+        # estimated variance
+        s2_1 = sm.predict_variances(x1)
+        s2_2 = sm.predict_variances(x2)
+        s2_3 = sm.predict_variances(x3)
+
+        self.assertEqual(
+            np.array_equal(
+                np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
+                sm._surrogate.optimal_noise,
+            ),
+            True,
+        )
+
     def test_mixed_homo_gaussian_3D_ord_cate(self):
         xt = np.array([[0.5, 0, 5], [2, 3, 4], [5, 2, -1], [-2, 4, 0.5]])
         yt = np.array([[0.0], [3], [1.0], [1.5]])
diff --git a/smt/surrogate_models/krg_based.py b/smt/surrogate_models/krg_based.py
index d3e481986..cf934f1ed 100644
--- a/smt/surrogate_models/krg_based.py
+++ b/smt/surrogate_models/krg_based.py
@@ -375,8 +375,42 @@ def _new_train(self):
         self._corr_params = None
         _, self.cat_features = compute_X_cont(self.X_train, self.design_space)
         D = None  # For SGP, D is not computed at all
+        # Center and scale X and y
+        (
+            self.X_norma,
+            self.y_norma,
+            self.X_offset,
+            self.y_mean,
+            self.X_scale,
+            self.y_std,
+        ) = standardization(X.copy(), y.copy())
+
+        if not self.options["eval_noise"]:
+            self.optimal_noise = np.array(self.options["noise0"])
+        elif self.options["use_het_noise"]:
+            # hetGP works with unique design variables when noise variance are not given
+            (
+                self.X_norma,
+                index_unique,
+                nt_reps,
+            ) = np.unique(self.X_norma, return_inverse=True, return_counts=True, axis=0)
+            self.nt = self.X_norma.shape[0]
+
+            # computing the mean of the output per unique design variable (see Binois et al., 2018)
+            y_norma_unique = []
+            for i in range(self.nt):
+                y_norma_unique.append(np.mean(self.y_norma[index_unique == i]))
+            # pointwise sensible estimates of the noise variances (see Ankenman et al., 2010)
+            self.optimal_noise = self.options["noise0"] * np.ones(self.nt)
+            for i in range(self.nt):
+                diff = self.y_norma[index_unique == i] - y_norma_unique[i]
+                if np.sum(diff**2) != 0.0:
+                    self.optimal_noise[i] = np.std(diff, ddof=1) ** 2
+            self.optimal_noise = self.optimal_noise / nt_reps
+            self.y_norma = y_norma_unique
+
         if not (self.is_continuous):
-            D, self.ij, X = gower_componentwise_distances(
+            D, self.ij, X_cont = gower_componentwise_distances(
                 X=X,
                 x_is_acting=is_acting,
                 design_space=self.design_space,
@@ -397,7 +431,7 @@ def _new_train(self):
                     mixint_type=MixIntKernelType.GOWER,
                 )
             if self.options["categorical_kernel"] == MixIntKernelType.CONT_RELAX:
-                X2, _ = self.design_space.unfold_x(self.training_points[None][0][0])
+                X2, _ = self.design_space.unfold_x(X)
                 (
                     self.X2_norma,
                     _,
@@ -405,7 +439,7 @@ def _new_train(self):
                     _,
                     self.X2_scale,
                     _,
-                ) = standardization(X2, self.training_points[None][0][1])
+                ) = standardization(X2.copy(), y.copy())
                 D, _ = cross_distances(self.X2_norma)
                 self.Lij, self.n_levels = cross_levels(
                     X=self.X_train, ij=self.ij, design_space=self.design_space
@@ -422,39 +456,15 @@ def _new_train(self):
                         mixint_type=MixIntKernelType.CONT_RELAX,
                     )
 
-        # Center and scale X and y
-        (
-            self.X_norma,
-            self.y_norma,
-            self.X_offset,
-            self.y_mean,
-            self.X_scale,
-            self.y_std,
-        ) = standardization(X, y)
-
-        if not self.options["eval_noise"]:
-            self.optimal_noise = np.array(self.options["noise0"])
-        elif self.options["use_het_noise"]:
-            # hetGP works with unique design variables when noise variance are not given
+            # Center and scale X_cont and y
             (
                 self.X_norma,
-                index_unique,
-                nt_reps,
-            ) = np.unique(self.X_norma, return_inverse=True, return_counts=True, axis=0)
-            self.nt = self.X_norma.shape[0]
-
-            # computing the mean of the output per unique design variable (see Binois et al., 2018)
-            y_norma_unique = []
-            for i in range(self.nt):
-                y_norma_unique.append(np.mean(self.y_norma[index_unique == i]))
-            # pointwise sensible estimates of the noise variances (see Ankenman et al., 2010)
-            self.optimal_noise = self.options["noise0"] * np.ones(self.nt)
-            for i in range(self.nt):
-                diff = self.y_norma[index_unique == i] - y_norma_unique[i]
-                if np.sum(diff**2) != 0.0:
-                    self.optimal_noise[i] = np.std(diff, ddof=1) ** 2
-            self.optimal_noise = self.optimal_noise / nt_reps
-            self.y_norma = y_norma_unique
+                self.y_norma,
+                self.X_offset,
+                self.y_mean,
+                self.X_scale,
+                self.y_std,
+            ) = standardization(X_cont.copy(), y.copy())
 
         if self.name not in ["SGP"]:
             if self.is_continuous:
@@ -871,7 +881,6 @@ def _reduced_likelihood_function(self, theta):
             noise = tmp_var[-1]
         if not (self.is_continuous):
             dx = self.D
-
             if self.options["categorical_kernel"] == MixIntKernelType.CONT_RELAX:
                 if "MFK" in self.name:
                     if (
@@ -920,7 +929,6 @@ def _reduced_likelihood_function(self, theta):
             r = self._correlation_types[self.options["corr"]](theta, self.D).reshape(
                 -1, 1
             )
-
         R = np.eye(self.nt) * (1.0 + nugget + noise)
         R[self.ij[:, 0], self.ij[:, 1]] = r[:, 0]
         R[self.ij[:, 1], self.ij[:, 0]] = r[:, 0]