Skip to content

[feature] #1191 is trained property to indicate the state of the model. #1232

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
- Add logger information on handling of stopIteration error (#960)
- Replace deprecated ConfigSpace methods (#1139)
- Separated Wallclock time measurements from CPU time measurements and storing them under new 'cpu_time' variable (#1173)
- Adapt RunHistory to be human readable (# 1174)
- Adapt RunHistory to be human readable (#1174)
- The models have a `is_trained` property to indicate the internal state (#1191)

## Dependencies
- Allow numpy >= 2.x (#1146)
Expand Down
6 changes: 6 additions & 0 deletions smac/model/abstract_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(
self._rng = np.random.RandomState(self._seed)
self._instance_features = instance_features
self._pca_components = pca_components
self._is_trained = False

n_features = 0
if self._instance_features is not None:
Expand Down Expand Up @@ -92,6 +93,11 @@ def meta(self) -> dict[str, Any]:
"pca_components": self._pca_components,
}

@property
def is_trained(self) -> bool:
"""Returns True if the model is trained, False otherwise."""
return self._is_trained

def train(self: Self, X: np.ndarray, Y: np.ndarray) -> Self:
"""Trains the random forest on X and Y. Internally, calls the method `_train`.

Expand Down
5 changes: 4 additions & 1 deletion smac/model/gaussian_process/mcmc_gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,10 @@ def _train(
model.mean_y_ = self.mean_y_
model.std_y_ = self.std_y_

self._is_trained = True
if not self._models:
self._is_trained = False
else:
self._is_trained = all(model.is_trained for model in self._models)
return self

def _get_gaussian_process(self) -> GaussianProcessRegressor:
Expand Down
7 changes: 7 additions & 0 deletions smac/model/multi_objective_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ def __init__(
seed=seed,
)

@property
def is_trained(self) -> bool:
"""Whether the model is trained or not."""
return self._is_trained

@property
def models(self) -> list[AbstractModel]:
"""The internally used surrogate models."""
Expand All @@ -76,6 +81,8 @@ def _train(self: Self, X: np.ndarray, Y: np.ndarray) -> Self:
for i, model in enumerate(self._models):
model.train(X, Y[:, i])

self._is_trained = all(model.is_trained for model in self._models)

return self

def _predict(
Expand Down
2 changes: 2 additions & 0 deletions smac/model/random_forest/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ def _train(self, X: np.ndarray, y: np.ndarray) -> RandomForest:
data = self._init_data_container(X, y)
self._rf.fit(data, rng=self._rng)

self._is_trained = True

return self

def _init_data_container(self, X: np.ndarray, y: np.ndarray) -> DataContainer:
Expand Down
10 changes: 10 additions & 0 deletions tests/test_model/test_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,16 @@ def test_predict():
assert m_hat.shape == (10, 1)
assert v_hat.shape == (10, 1)

def test_is_trained():
seed = 1
rs = np.random.RandomState(seed)
X, Y, n_dims = get_cont_data(rs)

model = get_gp(n_dims, seed)
assert not model.is_trained
model._train(X[:10], Y[:10], optimize_hyperparameters=False)
assert model.is_trained


def test_train_do_optimize():
# Check that do_optimize does not mess with the kernel hyperparameters given to the Gaussian process!
Expand Down
2 changes: 2 additions & 0 deletions tests/test_model/test_gp_mcmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,10 @@ def test_gp_train():
fixture = np.array([0.693147, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -6.907755])

model = get_gp(10, seed)
assert not model.is_trained
np.testing.assert_array_almost_equal(model._kernel.theta, fixture)
model.train(X[:10], Y[:10])
assert model.is_trained
assert len(model.models) == 36

for base_model in model.models:
Expand Down
2 changes: 2 additions & 0 deletions tests/test_model/test_rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ def test_predict():
X = rs.rand(20, 10)
Y = rs.rand(10, 1)
model = RandomForest(configspace=_get_cs(10))
assert not model.is_trained
model.train(X[:10], Y[:10])
assert model.is_trained
m_hat, v_hat = model.predict(X[10:])
assert m_hat.shape == (10, 1)
assert v_hat.shape == (10, 1)
Expand Down
Loading