Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions src/microplex_us/pipelines/donor_imputers.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ def fit(
# v7 bug that blanked the negative tail of capital
# gains, partnership income, farm income, etc. The
# `!= 0` label is the minimal fix; the full upgrade to
# `microimpute.ZeroInflatedImputer` (regime-aware
# tripartite routing with separate positive / negative
# QRFs) is tracked as a follow-up.
# the canonical regime-aware `microimpute.Imputer`
# (tripartite routing with separate positive / negative
# QRFs).
zero_model = RandomForestClassifier(
n_estimators=max(50, self.n_estimators // 2),
random_state=42,
Expand Down Expand Up @@ -146,9 +146,9 @@ def generate(


class RegimeAwareDonorImputer:
"""Donor imputer that wraps one chained `ZeroInflatedImputer` per block.
"""Donor imputer that wraps one chained canonical `microimpute.Imputer` block.

The whole target block is fit with one `ZeroInflatedImputer`, which
The whole target block is fit with one regime-gated `microimpute.Imputer`, which
auto-detects one of seven regimes (THREE_SIGN / ZI_POSITIVE /
ZI_NEGATIVE / SIGN_ONLY / POSITIVE_ONLY / NEGATIVE_ONLY /
DEGENERATE_ZERO) for each target and composes a gate classifier + one or
Expand Down Expand Up @@ -227,18 +227,18 @@ def fit(
) -> RegimeAwareDonorImputer:
del weight_col, epochs, batch_size, learning_rate, verbose

if importlib.util.find_spec("microimpute.models.zero_inflated") is None:
if importlib.util.find_spec("microimpute.models.regime_gated") is None:
raise ImportError(
"microimpute with microimpute.models.zero_inflated is required "
"microimpute with the canonical regime-gated Imputer is required "
"for donor_imputer_backend='regime_aware'."
)
if importlib.util.find_spec("quantile_forest") is None:
raise ImportError(
"quantile-forest is required for the RegimeAwareDonorImputer base QRF."
)

from microimpute import Imputer as MicroImputer
from microimpute.models.qrf import QRF
from microimpute.models.zero_inflated import ZeroInflatedImputer

self._fitted = {}
self._fitted_columns = ()
Expand All @@ -257,12 +257,13 @@ def fit(
if len(subset) < 25:
return self

wrapper = ZeroInflatedImputer(
wrapper = MicroImputer(
base_imputer_class=self._configured_qrf_class(QRF),
base_imputer_kwargs={},
classifier_type=self.classifier_type,
sequential=True,
signregime=True,
seed=self.seed,
log_level="WARNING",
)
fitted = wrapper.fit(
subset,
Expand All @@ -272,9 +273,10 @@ def fit(
self._fitted_columns = target_vars
self._predictor_columns = predictor_vars
self._fitted = {column: fitted for column in self._fitted_columns}
regimes = getattr(fitted, "regimes_", getattr(wrapper, "_regimes", {}))
self._regimes = {
column: regime
for column, regime in getattr(wrapper, "_regimes", {}).items()
for column, regime in regimes.items()
if column in target_set
}
return self
Expand Down
17 changes: 12 additions & 5 deletions tests/pipelines/test_regime_aware_donor_imputer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
in the interior band (``max(train_negatives)``, ``min(train_positives)``)
— a region no real record occupies.

v9 upgrades to `microimpute.models.ZeroInflatedImputer`, which at fit
time auto-detects the three-sign regime per target and routes
v9 upgrades to canonical `microimpute.Imputer`, which at fit time
auto-detects the three-sign regime per target and routes
predictions through separate positive and negative QRFs. The
interior-band gap becomes a structural guarantee, not a statistical
averaging hope.
Expand All @@ -20,7 +20,7 @@
Tests pin:

1. The new backend value resolves through the factory to a donor
imputer that uses ZeroInflatedImputer internally.
imputer that uses canonical regime-gated microimpute internally.
2. On a three-sign training fixture, predictions preserve negatives
(as v8's `y != 0` fix already does).
3. On the same fixture, predictions NEVER land in the interior band
Expand All @@ -36,7 +36,9 @@

pytest.importorskip("quantile_forest")
pytest.importorskip("microimpute")
pytest.importorskip("microimpute.models.zero_inflated")

from microimpute import Imputer as CanonicalMicroImputer
from microimpute.models.regime_gated import REGIME_THREE_SIGN


def _three_sign_frame_with_gap(n: int = 1500, seed: int = 0) -> pd.DataFrame:
Expand Down Expand Up @@ -89,6 +91,11 @@ def _count_interior_violations(
class TestRegimeAwareDonorImputerClassExists:
"""The new donor imputer must be importable from microplex_us.pipelines.us."""

def test_canonical_microimpute_api_is_required(self) -> None:
imputer = CanonicalMicroImputer()
assert imputer.signregime is True
assert REGIME_THREE_SIGN == "THREE_SIGN"

def test_importable_from_us_module(self) -> None:
from microplex_us.pipelines.us import RegimeAwareDonorImputer

Expand Down Expand Up @@ -155,7 +162,7 @@ def fit(self, *args, **kwargs):
assert captured["fit_kwargs"]["n_estimators"] == 7
assert captured["fit_kwargs"]["n_jobs"] == -1

def test_multi_target_fit_uses_one_chained_zero_inflated_imputer(self) -> None:
def test_multi_target_fit_uses_one_chained_regime_gated_imputer(self) -> None:
from microplex_us.pipelines.us import RegimeAwareDonorImputer

rng = np.random.default_rng(20260606)
Expand Down
Loading