From 248ad256542d3c3a1587e5af5e8dc4849461cbbb Mon Sep 17 00:00:00 2001 From: Paul Sangrey Date: Thu, 22 May 2025 14:16:28 -0400 Subject: [PATCH 1/3] Updated the representation file to handle obs_intercept with a time index. --- pymc_extras/statespace/core/representation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymc_extras/statespace/core/representation.py b/pymc_extras/statespace/core/representation.py index 450108d38..f563ca861 100644 --- a/pymc_extras/statespace/core/representation.py +++ b/pymc_extras/statespace/core/representation.py @@ -298,7 +298,7 @@ def _validate_matrix_shape(self, name: str, X: np.ndarray | pt.TensorVariable) - ) # Time varying vector case, check only the static shapes - if X.ndim == 2 and X.shape[1:] != expected_shape: + if X.ndim == 2 and shape[1:] != expected_shape: raise ValueError( f"Last dimension of array provided for {name} has shape {X.shape[1]}, " f"expected {expected_shape}" @@ -334,7 +334,7 @@ def _check_provided_tensor(self, name: str, X: pt.TensorVariable) -> pt.TensorVa X = pt.expand_dims(X, (0,)) X = pt.specify_shape(X, self.shapes[name]) - elif X.ndim == 2: + elif X.ndim == 2 and name not in VECTOR_VALUED: X = pt.expand_dims(X, (0,)) X = pt.specify_shape(X, self.shapes[name]) From 3ea0f4c056f646a4bdb94f1922760ea8ec6320ae Mon Sep 17 00:00:00 2001 From: Paul Sangrey Date: Fri, 30 May 2025 11:53:24 -0400 Subject: [PATCH 2/3] Brought the attempt to make structural.py work with multivariate time series. --- pymc_extras/statespace/models/structural.py | 135 ++++++++++++++++---- 1 file changed, 111 insertions(+), 24 deletions(-) diff --git a/pymc_extras/statespace/models/structural.py b/pymc_extras/statespace/models/structural.py index a982366c3..f4bc4115e 100644 --- a/pymc_extras/statespace/models/structural.py +++ b/pymc_extras/statespace/models/structural.py @@ -78,6 +78,7 @@ def __init__( component_info: dict[str, dict[str, Any]], measurement_error: bool, name_to_variable: dict[str, Variable], + endog_names: list[str] | None = None, name_to_data: dict[str, Variable] | None = None, name: str | None = None, verbose: bool = True, @@ -88,6 +89,7 @@ def __init__( if name is None: name = "data" self._name = name + self._endog_names = endog_names if endog_names is not None else [name] k_states, k_posdef, k_endog = ssm.k_states, ssm.k_posdef, ssm.k_endog param_names, param_dims, param_info = self._add_inital_state_cov_to_properties( @@ -165,7 +167,7 @@ def state_names(self): @property def observed_states(self): - return [self._name] + return self._endog_names @property def shock_names(self): @@ -351,6 +353,7 @@ def __init__( k_endog, k_states, k_posdef, + endog_names=None: state_names=None, data_names=None, shock_names=None, @@ -367,6 +370,7 @@ def __init__( self.k_states = k_states self.k_posdef = k_posdef self.measurement_error = measurement_error + self.endog_names = [self.name] if endog_names is None else endog_names self.state_names = state_names if state_names is not None else [] self.data_names = data_names if data_names is not None else [] @@ -597,6 +601,10 @@ def _make_combined_name(self): return name def __add__(self, other): + + if self.endog_names != other.endog_names: + raise ValueError("The endog names must be the same.") + state_names = self._combine_property(other, "state_names") data_names = self._combine_property(other, "data_names") param_names = self._combine_property(other, "param_names") @@ -617,7 +625,8 @@ def __add__(self, other): new_comp = Component( name="", - k_endog=1, + endog_names=self.endog_names, + k_endog=k_endog, k_states=k_states, k_posdef=k_posdef, measurement_error=measurement_error, @@ -682,6 +691,7 @@ def build( return StructuralTimeSeries( self.ssm, name=name, + endog_names=self.endog_names, state_names=self.state_names, data_names=self.data_names, shock_names=self.shock_names, @@ -806,9 +816,11 @@ class LevelTrendComponent(Component): def __init__( self, + endog_names, order: int | list[int] = 2, innovations_order: int | list[int] | None = None, name: str = "LevelTrend", + **kwargs, ): if innovations_order is None: innovations_order = order @@ -840,14 +852,25 @@ def __init__( super().__init__( name, - k_endog=1, + endog_names=endog_names, + k_endog=len(endog_names), k_states=k_states, k_posdef=k_posdef, measurement_error=False, combine_hidden_states=False, obs_state_idxs=np.array([1.0] + [0.0] * (k_states - 1)), + **kwargs ) + @property + def observed_states(self): + return self.endog_names + + @property + def k_endog(self): + return len(self.endog_names) + + def populate_component_properties(self): name_slice = POSITION_DERIVATIVE_NAMES[: self.k_states] self.param_names = ["initial_trend"] @@ -878,7 +901,7 @@ def make_symbolic_graph(self) -> None: R = R[:, self.innovations_order] self.ssm["selection", :, :] = R - self.ssm["design", 0, :] = np.array([1.0] + [0.0] * (self.k_states - 1)) + self.ssm["design", :, :] = np.tile(np.array([1.0] + [0.0] * (self.k_states - 1)), (self.k_endog, 1)) if self.k_posdef > 0: sigma_trend = self.make_and_register_variable("sigma_trend", shape=(self.k_posdef,)) @@ -925,32 +948,43 @@ class MeasurementError(Component): idata = pm.sample(nuts_sampler='numpyro') """ - def __init__(self, name: str = "MeasurementError"): - k_endog = 1 + def __init__(self, endog_names, time_dim, name: str = "MeasurementError", **kwargs): + k_states = 0 k_posdef = 0 + self._time_dim = time_dim super().__init__( - name, k_endog, k_states, k_posdef, measurement_error=True, combine_hidden_states=False + name, + endog_names=endog_names, + k_endog=len(endog_names), + k_states, + k_posdef, + measurement_error=True, + combine_hidden_states=False, + **kwargs, ) + def populate_component_properties(self): - self.param_names = [f"sigma_{self.name}"] - self.param_dims = {} self.param_info = { - f"sigma_{self.name}": { - "shape": (), - "constraints": "Positive", - "dims": None, + f"{self.name}_covariance": { + "shape": (self.k_endog, self.k_endog), + "constraints": "Positive semi-definite", + "dims": ( OBS_STATE_DIM, OBS_STATE_AUX_DIM,), } } + self.param_names = list(self.param_info.keys()) + self.param_dims = {name: val["dims"] for name, val in self.param_info.items()} def make_symbolic_graph(self) -> None: - sigma_shape = () - error_sigma = self.make_and_register_variable(f"sigma_{self.name}", shape=sigma_shape) - diag_idx = np.diag_indices(self.k_endog) - idx = np.s_["obs_cov", diag_idx[0], diag_idx[1]] - self.ssm[idx] = error_sigma**2 + + sigma_shape = (self.k_endog, self.k_endog) + obs_covariance = self.make_and_register_variable( + f"{self.name}_covariance", shape=sigma_shape + ) + + self.ssm["obs_cov"] = obs_covariance class AutoregressiveComponent(Component): @@ -1009,7 +1043,7 @@ class AutoregressiveComponent(Component): """ - def __init__(self, order: int = 1, name: str = "AutoRegressive"): + def __init__( self, endog_names, order: int = 1, name: str = "AutoRegressive"): order = order_to_mask(order) ar_lags = np.flatnonzero(order).ravel().astype(int) + 1 k_states = len(order) @@ -1019,7 +1053,8 @@ def __init__(self, order: int = 1, name: str = "AutoRegressive"): super().__init__( name=name, - k_endog=1, + endog_names=endog_names, + k_endog=len(endog_names), k_states=k_states, k_posdef=1, measurement_error=True, @@ -1051,7 +1086,8 @@ def make_symbolic_graph(self) -> None: T = np.eye(self.k_states, k=-1) self.ssm["transition", :, :] = T self.ssm["selection", 0, 0] = 1 - self.ssm["design", 0, 0] = 1 + # We want all the endogenous variables to load on this + self.ssm["design", :, 0] = 1 ar_idx = ("transition", np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0]) self.ssm[ar_idx] = ar_params @@ -1315,7 +1351,9 @@ class FrequencySeasonality(Component): to isolate and identify a "Monday" effect, for instance. """ - def __init__(self, season_length, n=None, name=None, innovations=True): + def __init__( + self, endog_names, season_length, n=None, name=None, innovations=True, **kwargs + ): if n is None: n = int(season_length // 2) if name is None: @@ -1337,16 +1375,18 @@ def __init__(self, season_length, n=None, name=None, innovations=True): super().__init__( name=name, - k_endog=1, + endog_names=endog_names, + k_endog=len(endog_names), k_states=k_states, k_posdef=k_states * int(self.innovations), measurement_error=False, combine_hidden_states=True, obs_state_idxs=obs_state_idx, + **kwargs ) def make_symbolic_graph(self) -> None: - self.ssm["design", 0, slice(0, self.k_states, 2)] = 1 + self.ssm["design", :, slice(0, self.k_states, 2)] = 1 init_state = self.make_and_register_variable(f"{self.name}", shape=(self.n_coefs,)) @@ -1677,3 +1717,50 @@ def populate_component_properties(self) -> None: "constraints": "Positive", "dims": ("exog_state",), } + +class ObsIntercept(Component): + + + def __init__(self, endog_names, time_dim, name: str = "y"): + + self._time_dim = time_dim + super().__init__( + name=name, + endog_names=endog_names, + k_endog=len(endog_names), + k_states=0, + k_posdef=0, + measurement_error=False, + combine_hidden_states=False, + ) + + def populate_component_properties(self): + + self.param_info = { + f"mu": { + "shape": ( + # TODO This should not be necessary. + self._time_dim, + self.k_endog, + ), + "constraints": None, + "dims": ( + TIME_DIM, + OBS_STATE_DIM, + ), + }, + } + self.param_dims = {name: val["dims"] for name, val in self.param_info.items()} + self.param_names = list(self.param_info.keys()) + + def make_symbolic_graph(self) -> None: + + mu = self.make_and_register_variable( + f"mu", + shape=( + self._time_dim, + self.k_endog, + ), + ) + + self.ssm["obs_intercept"] = mu From 93b244c553dae38e8894e9c549c9c807a1331144 Mon Sep 17 00:00:00 2001 From: Paul Sangrey Date: Fri, 30 May 2025 12:10:05 -0400 Subject: [PATCH 3/3] Fixed a few other minor syntax errors. --- pymc_extras/statespace/models/structural.py | 42 ++++++++++----------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/pymc_extras/statespace/models/structural.py b/pymc_extras/statespace/models/structural.py index f4bc4115e..c7b52d893 100644 --- a/pymc_extras/statespace/models/structural.py +++ b/pymc_extras/statespace/models/structural.py @@ -25,6 +25,8 @@ ALL_STATE_DIM, AR_PARAM_DIM, LONG_MATRIX_NAMES, + OBS_STATE_AUX_DIM, + OBS_STATE_DIM, POSITION_DERIVATIVE_NAMES, TIME_DIM, ) @@ -353,7 +355,7 @@ def __init__( k_endog, k_states, k_posdef, - endog_names=None: + endog_names=None, state_names=None, data_names=None, shock_names=None, @@ -601,7 +603,6 @@ def _make_combined_name(self): return name def __add__(self, other): - if self.endog_names != other.endog_names: raise ValueError("The endog names must be the same.") @@ -859,7 +860,7 @@ def __init__( measurement_error=False, combine_hidden_states=False, obs_state_idxs=np.array([1.0] + [0.0] * (k_states - 1)), - **kwargs + **kwargs, ) @property @@ -870,7 +871,6 @@ def observed_states(self): def k_endog(self): return len(self.endog_names) - def populate_component_properties(self): name_slice = POSITION_DERIVATIVE_NAMES[: self.k_states] self.param_names = ["initial_trend"] @@ -901,7 +901,9 @@ def make_symbolic_graph(self) -> None: R = R[:, self.innovations_order] self.ssm["selection", :, :] = R - self.ssm["design", :, :] = np.tile(np.array([1.0] + [0.0] * (self.k_states - 1)), (self.k_endog, 1)) + self.ssm["design", :, :] = np.tile( + np.array([1.0] + [0.0] * (self.k_states - 1)), (self.k_endog, 1) + ) if self.k_posdef > 0: sigma_trend = self.make_and_register_variable("sigma_trend", shape=(self.k_posdef,)) @@ -949,36 +951,36 @@ class MeasurementError(Component): """ def __init__(self, endog_names, time_dim, name: str = "MeasurementError", **kwargs): - k_states = 0 k_posdef = 0 self._time_dim = time_dim super().__init__( name, - endog_names=endog_names, - k_endog=len(endog_names), k_states, k_posdef, + endog_names=endog_names, + k_endog=len(endog_names), measurement_error=True, combine_hidden_states=False, **kwargs, ) - def populate_component_properties(self): self.param_info = { f"{self.name}_covariance": { "shape": (self.k_endog, self.k_endog), "constraints": "Positive semi-definite", - "dims": ( OBS_STATE_DIM, OBS_STATE_AUX_DIM,), + "dims": ( + OBS_STATE_DIM, + OBS_STATE_AUX_DIM, + ), } } self.param_names = list(self.param_info.keys()) self.param_dims = {name: val["dims"] for name, val in self.param_info.items()} def make_symbolic_graph(self) -> None: - sigma_shape = (self.k_endog, self.k_endog) obs_covariance = self.make_and_register_variable( f"{self.name}_covariance", shape=sigma_shape @@ -1043,7 +1045,7 @@ class AutoregressiveComponent(Component): """ - def __init__( self, endog_names, order: int = 1, name: str = "AutoRegressive"): + def __init__(self, endog_names, order: int = 1, name: str = "AutoRegressive"): order = order_to_mask(order) ar_lags = np.flatnonzero(order).ravel().astype(int) + 1 k_states = len(order) @@ -1351,9 +1353,7 @@ class FrequencySeasonality(Component): to isolate and identify a "Monday" effect, for instance. """ - def __init__( - self, endog_names, season_length, n=None, name=None, innovations=True, **kwargs - ): + def __init__(self, endog_names, season_length, n=None, name=None, innovations=True, **kwargs): if n is None: n = int(season_length // 2) if name is None: @@ -1382,7 +1382,7 @@ def __init__( measurement_error=False, combine_hidden_states=True, obs_state_idxs=obs_state_idx, - **kwargs + **kwargs, ) def make_symbolic_graph(self) -> None: @@ -1718,11 +1718,9 @@ def populate_component_properties(self) -> None: "dims": ("exog_state",), } -class ObsIntercept(Component): - +class ObsIntercept(Component): def __init__(self, endog_names, time_dim, name: str = "y"): - self._time_dim = time_dim super().__init__( name=name, @@ -1735,9 +1733,8 @@ def __init__(self, endog_names, time_dim, name: str = "y"): ) def populate_component_properties(self): - self.param_info = { - f"mu": { + "mu": { "shape": ( # TODO This should not be necessary. self._time_dim, @@ -1754,9 +1751,8 @@ def populate_component_properties(self): self.param_names = list(self.param_info.keys()) def make_symbolic_graph(self) -> None: - mu = self.make_and_register_variable( - f"mu", + "mu", shape=( self._time_dim, self.k_endog,