From ab6a92b2de0a07aba03fcb4e0c26b9cfd67ec3ca Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 7 Jun 2026 02:31:52 +0200 Subject: [PATCH] Use canonical regime-gated microimpute API --- src/microplex_us/pipelines/donor_imputers.py | 24 ++++++++++--------- .../test_regime_aware_donor_imputer.py | 17 +++++++++---- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/microplex_us/pipelines/donor_imputers.py b/src/microplex_us/pipelines/donor_imputers.py index 9241dc1..7bc0880 100644 --- a/src/microplex_us/pipelines/donor_imputers.py +++ b/src/microplex_us/pipelines/donor_imputers.py @@ -81,9 +81,9 @@ def fit( # v7 bug that blanked the negative tail of capital # gains, partnership income, farm income, etc. The # `!= 0` label is the minimal fix; the full upgrade to - # `microimpute.ZeroInflatedImputer` (regime-aware - # tripartite routing with separate positive / negative - # QRFs) is tracked as a follow-up. + # the canonical regime-aware `microimpute.Imputer` + # (tripartite routing with separate positive / negative + # QRFs). zero_model = RandomForestClassifier( n_estimators=max(50, self.n_estimators // 2), random_state=42, @@ -146,9 +146,9 @@ def generate( class RegimeAwareDonorImputer: - """Donor imputer that wraps one chained `ZeroInflatedImputer` per block. + """Donor imputer that wraps one chained canonical `microimpute.Imputer` block. - The whole target block is fit with one `ZeroInflatedImputer`, which + The whole target block is fit with one regime-gated `microimpute.Imputer`, which auto-detects one of seven regimes (THREE_SIGN / ZI_POSITIVE / ZI_NEGATIVE / SIGN_ONLY / POSITIVE_ONLY / NEGATIVE_ONLY / DEGENERATE_ZERO) for each target and composes a gate classifier + one or @@ -227,9 +227,9 @@ def fit( ) -> RegimeAwareDonorImputer: del weight_col, epochs, batch_size, learning_rate, verbose - if importlib.util.find_spec("microimpute.models.zero_inflated") is None: + if importlib.util.find_spec("microimpute.models.regime_gated") is None: raise ImportError( - "microimpute with microimpute.models.zero_inflated is required " + "microimpute with the canonical regime-gated Imputer is required " "for donor_imputer_backend='regime_aware'." ) if importlib.util.find_spec("quantile_forest") is None: @@ -237,8 +237,8 @@ def fit( "quantile-forest is required for the RegimeAwareDonorImputer base QRF." ) + from microimpute import Imputer as MicroImputer from microimpute.models.qrf import QRF - from microimpute.models.zero_inflated import ZeroInflatedImputer self._fitted = {} self._fitted_columns = () @@ -257,12 +257,13 @@ def fit( if len(subset) < 25: return self - wrapper = ZeroInflatedImputer( + wrapper = MicroImputer( base_imputer_class=self._configured_qrf_class(QRF), base_imputer_kwargs={}, classifier_type=self.classifier_type, - sequential=True, + signregime=True, seed=self.seed, + log_level="WARNING", ) fitted = wrapper.fit( subset, @@ -272,9 +273,10 @@ def fit( self._fitted_columns = target_vars self._predictor_columns = predictor_vars self._fitted = {column: fitted for column in self._fitted_columns} + regimes = getattr(fitted, "regimes_", getattr(wrapper, "_regimes", {})) self._regimes = { column: regime - for column, regime in getattr(wrapper, "_regimes", {}).items() + for column, regime in regimes.items() if column in target_set } return self diff --git a/tests/pipelines/test_regime_aware_donor_imputer.py b/tests/pipelines/test_regime_aware_donor_imputer.py index 1299df2..bacdbc4 100644 --- a/tests/pipelines/test_regime_aware_donor_imputer.py +++ b/tests/pipelines/test_regime_aware_donor_imputer.py @@ -7,8 +7,8 @@ in the interior band (``max(train_negatives)``, ``min(train_positives)``) — a region no real record occupies. -v9 upgrades to `microimpute.models.ZeroInflatedImputer`, which at fit -time auto-detects the three-sign regime per target and routes +v9 upgrades to canonical `microimpute.Imputer`, which at fit time +auto-detects the three-sign regime per target and routes predictions through separate positive and negative QRFs. The interior-band gap becomes a structural guarantee, not a statistical averaging hope. @@ -20,7 +20,7 @@ Tests pin: 1. The new backend value resolves through the factory to a donor - imputer that uses ZeroInflatedImputer internally. + imputer that uses canonical regime-gated microimpute internally. 2. On a three-sign training fixture, predictions preserve negatives (as v8's `y != 0` fix already does). 3. On the same fixture, predictions NEVER land in the interior band @@ -36,7 +36,9 @@ pytest.importorskip("quantile_forest") pytest.importorskip("microimpute") -pytest.importorskip("microimpute.models.zero_inflated") + +from microimpute import Imputer as CanonicalMicroImputer +from microimpute.models.regime_gated import REGIME_THREE_SIGN def _three_sign_frame_with_gap(n: int = 1500, seed: int = 0) -> pd.DataFrame: @@ -89,6 +91,11 @@ def _count_interior_violations( class TestRegimeAwareDonorImputerClassExists: """The new donor imputer must be importable from microplex_us.pipelines.us.""" + def test_canonical_microimpute_api_is_required(self) -> None: + imputer = CanonicalMicroImputer() + assert imputer.signregime is True + assert REGIME_THREE_SIGN == "THREE_SIGN" + def test_importable_from_us_module(self) -> None: from microplex_us.pipelines.us import RegimeAwareDonorImputer @@ -155,7 +162,7 @@ def fit(self, *args, **kwargs): assert captured["fit_kwargs"]["n_estimators"] == 7 assert captured["fit_kwargs"]["n_jobs"] == -1 - def test_multi_target_fit_uses_one_chained_zero_inflated_imputer(self) -> None: + def test_multi_target_fit_uses_one_chained_regime_gated_imputer(self) -> None: from microplex_us.pipelines.us import RegimeAwareDonorImputer rng = np.random.default_rng(20260606)