automl · timruhkopf · Apr 17, 2025 · Apr 17, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -22,7 +22,8 @@
 - Add logger information on handling of stopIteration error (#960)
 - Replace deprecated ConfigSpace methods (#1139)
 - Separated Wallclock time measurements from CPU time measurements and storing them under new 'cpu_time' variable (#1173)
-- Adapt RunHistory to be human readable (# 1174)
+- Adapt RunHistory to be human readable (#1174)
+- The models have a `is_trained` property to indicate the internal state (#1191)
 
 ## Dependencies
 - Allow numpy >= 2.x (#1146)

diff --git a/smac/model/abstract_model.py b/smac/model/abstract_model.py
@@ -57,6 +57,7 @@ def __init__(
         self._rng = np.random.RandomState(self._seed)
         self._instance_features = instance_features
         self._pca_components = pca_components
+        self._is_trained = False
 
         n_features = 0
         if self._instance_features is not None:
@@ -92,6 +93,11 @@ def meta(self) -> dict[str, Any]:
             "pca_components": self._pca_components,
         }
 
+    @property
+    def is_trained(self) -> bool:
+        """Returns True if the model is trained, False otherwise."""
+        return self._is_trained
+
     def train(self: Self, X: np.ndarray, Y: np.ndarray) -> Self:
         """Trains the random forest on X and Y. Internally, calls the method `_train`.
 

diff --git a/smac/model/gaussian_process/mcmc_gaussian_process.py b/smac/model/gaussian_process/mcmc_gaussian_process.py
@@ -287,7 +287,10 @@ def _train(
                 model.mean_y_ = self.mean_y_
                 model.std_y_ = self.std_y_
 
-        self._is_trained = True
+        if not self._models:
+            self._is_trained = False
+        else:
+            self._is_trained = all(model.is_trained for model in self._models)
         return self
 
     def _get_gaussian_process(self) -> GaussianProcessRegressor:

diff --git a/smac/model/multi_objective_model.py b/smac/model/multi_objective_model.py
@@ -53,6 +53,11 @@ def __init__(
             seed=seed,
         )
 
+    @property
+    def is_trained(self) -> bool:
+        """Whether the model is trained or not."""
+        return self._is_trained
+
     @property
     def models(self) -> list[AbstractModel]:
         """The internally used surrogate models."""
@@ -76,6 +81,8 @@ def _train(self: Self, X: np.ndarray, Y: np.ndarray) -> Self:
         for i, model in enumerate(self._models):
             model.train(X, Y[:, i])
 
+        self._is_trained = all(model.is_trained for model in self._models)
+
         return self
 
     def _predict(

diff --git a/smac/model/random_forest/random_forest.py b/smac/model/random_forest/random_forest.py
@@ -153,6 +153,8 @@ def _train(self, X: np.ndarray, y: np.ndarray) -> RandomForest:
         data = self._init_data_container(X, y)
         self._rf.fit(data, rng=self._rng)
 
+        self._is_trained = True
+
         return self
 
     def _init_data_container(self, X: np.ndarray, y: np.ndarray) -> DataContainer:

diff --git a/tests/test_model/test_gp.py b/tests/test_model/test_gp.py
@@ -176,6 +176,16 @@ def test_predict():
         assert m_hat.shape == (10, 1)
         assert v_hat.shape == (10, 1)
 
+def test_is_trained():
+    seed = 1
+    rs = np.random.RandomState(seed)
+    X, Y, n_dims = get_cont_data(rs)
+
+    model = get_gp(n_dims, seed)
+    assert not model.is_trained
+    model._train(X[:10], Y[:10], optimize_hyperparameters=False)
+    assert model.is_trained
+
 
 def test_train_do_optimize():
     # Check that do_optimize does not mess with the kernel hyperparameters given to the Gaussian process!

diff --git a/tests/test_model/test_gp_mcmc.py b/tests/test_model/test_gp_mcmc.py
@@ -91,8 +91,10 @@ def test_gp_train():
     fixture = np.array([0.693147, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -6.907755])
 
     model = get_gp(10, seed)
+    assert not model.is_trained
     np.testing.assert_array_almost_equal(model._kernel.theta, fixture)
     model.train(X[:10], Y[:10])
+    assert model.is_trained
     assert len(model.models) == 36
 
     for base_model in model.models:

diff --git a/tests/test_model/test_rf.py b/tests/test_model/test_rf.py
@@ -49,7 +49,9 @@ def test_predict():
     X = rs.rand(20, 10)
     Y = rs.rand(10, 1)
     model = RandomForest(configspace=_get_cs(10))
+    assert not model.is_trained
     model.train(X[:10], Y[:10])
+    assert model.is_trained
     m_hat, v_hat = model.predict(X[10:])
     assert m_hat.shape == (10, 1)
     assert v_hat.shape == (10, 1)