Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ authors = [
]
requires-python = ">=3.13"
dependencies = [
"microplex[calibrate] @ git+https://github.com/PolicyEngine/microplex.git@1e0627182f9df40aacd7043c96956c2895bf9d30",
"microplex[calibrate] @ git+https://github.com/PolicyEngine/microplex.git@90f21d2b2048ed810cde9240f8d03d5bfc1565fc",
"duckdb>=1.2",
"h5py>=3.10",
"requests>=2.31",
Expand All @@ -35,7 +35,7 @@ hf = [
"huggingface_hub>=0.24",
]
policyengine = [
"microimpute==3.1.1; python_full_version >= '3.12' and python_full_version < '3.15'",
"microimpute @ git+https://github.com/PolicyEngine/microimpute.git@90be828eb442c48ee86bb91bb83a75da4b0f0f89 ; python_full_version >= '3.12' and python_full_version < '3.15'",
"policyengine-us==1.715.2; python_version >= '3.11' and python_version < '3.15'",
"spm-calculator>=0.3.1",
# Standalone tax-unit construction engine (the extraction of eCPS's
Expand Down Expand Up @@ -85,6 +85,7 @@ allow-direct-references = true
[tool.hatch.build.targets.wheel.force-include]
"src/microplex_us/pipelines/pe_native_scores.py" = "microplex_us/pipelines/pe_native_scores.py"
"src/microplex_us/pipelines/ecps_export_contract.json" = "microplex_us/pipelines/ecps_export_contract.json"
"src/microplex_us/specs/us-2024.yaml" = "microplex_us/specs/us-2024.yaml"

[tool.pytest.ini_options]
testpaths = ["tests"]
Expand Down
1 change: 1 addition & 0 deletions src/microplex_us/specs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Packaged Microplex-US declarative specs."""
261 changes: 261 additions & 0 deletions src/microplex_us/specs/us-2024.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
meta:
country: us
model_year: 2024
policyengine_model: policyengine-us

sources:
cps_asec: { dataset: cps_asec_2025_calendar_2024, role: spine }
puf: { dataset: puf_2024, role: donor }
acs: { dataset: acs_2024, role: donor }
sipp: { dataset: sipp_2023, role: donor }
scf: { dataset: scf_2022, role: donor }

spine:
base: cps_asec
method: clone
clone: { seed: 20260529 }
halves:
- { name: cps_keep, keep: all }
- { name: synthetic_puf, strip_to: [demographics] }

imputation:
- onto: synthetic_puf
from: puf
vars:
- employment_income
- partnership_s_corp_income
- social_security
- taxable_pension_income
- interest_deduction
- tax_exempt_pension_income
- long_term_capital_gains
- unreimbursed_business_employee_expenses
- pre_tax_contributions
- taxable_ira_distributions
- self_employment_income
- w2_wages_from_qualified_business
- unadjusted_basis_qualified_property
- business_is_sstb
- sstb_self_employment_income_before_lsr
- sstb_self_employment_income
- sstb_self_employment_income_would_be_qualified
- sstb_w2_wages_from_qualified_business
- sstb_unadjusted_basis_qualified_property
- short_term_capital_gains
- qualified_dividend_income
- charitable_cash_donations
- self_employed_pension_contribution_ald
- unrecaptured_section_1250_gain
- taxable_unemployment_compensation
- taxable_interest_income
- domestic_production_ald
- self_employed_health_insurance_ald
- rental_income
- non_qualified_dividend_income
- cdcc_relevant_expenses
- tax_exempt_interest_income
- salt_refund_income
- foreign_tax_credit
- estate_income
- charitable_non_cash_donations
- american_opportunity_credit
- miscellaneous_income
- alimony_expense
- farm_income
- partnership_se_income
- alimony_income
- health_savings_account_ald
- non_sch_d_capital_gains
- general_business_credit
- energy_efficient_home_improvement_credit
- traditional_ira_contributions
- amt_foreign_tax_credit
- excess_withheld_payroll_tax
- savers_credit
- student_loan_interest
- investment_income_elected_form_4952
- early_withdrawal_penalty
- prior_year_minimum_tax_credit
- farm_rent_income
- qualified_tuition_expenses
- educator_expense
- long_term_capital_gains_on_collectibles
- other_credits
- casualty_loss
- unreported_payroll_tax
- recapture_of_investment_credit
- deductible_mortgage_interest
- home_mortgage_interest
- investment_interest_expense
- other_health_insurance_premiums
- qualified_reit_and_ptp_income
- qualified_bdc_income
- farm_operations_income
- estate_income_would_be_qualified
- farm_operations_income_would_be_qualified
- farm_rent_income_would_be_qualified
- partnership_s_corp_income_would_be_qualified
- rental_income_would_be_qualified
- self_employment_income_would_be_qualified
- weeks_unemployed
condition_on: [demographics]
order: spine_first
synthesize: true

- onto: cps_keep
from: puf
vars:
- employment_income
- partnership_s_corp_income
- social_security
- taxable_pension_income
- interest_deduction
- tax_exempt_pension_income
- long_term_capital_gains
- unreimbursed_business_employee_expenses
- pre_tax_contributions
- taxable_ira_distributions
- self_employment_income
- w2_wages_from_qualified_business
- unadjusted_basis_qualified_property
- business_is_sstb
- sstb_self_employment_income_before_lsr
- sstb_self_employment_income
- sstb_self_employment_income_would_be_qualified
- sstb_w2_wages_from_qualified_business
- sstb_unadjusted_basis_qualified_property
- short_term_capital_gains
- qualified_dividend_income
- charitable_cash_donations
- self_employed_pension_contribution_ald
- unrecaptured_section_1250_gain
- taxable_unemployment_compensation
- taxable_interest_income
- domestic_production_ald
- self_employed_health_insurance_ald
- rental_income
- non_qualified_dividend_income
- cdcc_relevant_expenses
- tax_exempt_interest_income
- salt_refund_income
- foreign_tax_credit
- estate_income
- charitable_non_cash_donations
- american_opportunity_credit
- miscellaneous_income
- alimony_expense
- farm_income
- partnership_se_income
- alimony_income
- health_savings_account_ald
- non_sch_d_capital_gains
- general_business_credit
- energy_efficient_home_improvement_credit
- traditional_ira_contributions
- amt_foreign_tax_credit
- excess_withheld_payroll_tax
- savers_credit
- student_loan_interest
- investment_income_elected_form_4952
- early_withdrawal_penalty
- prior_year_minimum_tax_credit
- farm_rent_income
- qualified_tuition_expenses
- educator_expense
- long_term_capital_gains_on_collectibles
- other_credits
- casualty_loss
- unreported_payroll_tax
- recapture_of_investment_credit
- deductible_mortgage_interest
- home_mortgage_interest
- investment_interest_expense
- other_health_insurance_premiums
- qualified_reit_and_ptp_income
- qualified_bdc_income
- farm_operations_income
- estate_income_would_be_qualified
- farm_operations_income_would_be_qualified
- farm_rent_income_would_be_qualified
- partnership_s_corp_income_would_be_qualified
- rental_income_would_be_qualified
- self_employment_income_would_be_qualified
- weeks_unemployed
condition_on: [demographics]
order: spine_first

- onto: cps_keep
from: puf
vars:
- partnership_s_corp_income
- interest_deduction
- unreimbursed_business_employee_expenses
- pre_tax_contributions
- w2_wages_from_qualified_business
- unadjusted_basis_qualified_property
- business_is_sstb
- sstb_self_employment_income_before_lsr
- sstb_self_employment_income
- sstb_self_employment_income_would_be_qualified
- sstb_w2_wages_from_qualified_business
- sstb_unadjusted_basis_qualified_property
- charitable_cash_donations
- self_employed_pension_contribution_ald
- unrecaptured_section_1250_gain
- taxable_unemployment_compensation
- domestic_production_ald
- self_employed_health_insurance_ald
- cdcc_relevant_expenses
- salt_refund_income
- foreign_tax_credit
- estate_income
- charitable_non_cash_donations
- american_opportunity_credit
- miscellaneous_income
- alimony_expense
- health_savings_account_ald
- non_sch_d_capital_gains
- general_business_credit
- energy_efficient_home_improvement_credit
- amt_foreign_tax_credit
- excess_withheld_payroll_tax
- savers_credit
- student_loan_interest
- investment_income_elected_form_4952
- early_withdrawal_penalty
- prior_year_minimum_tax_credit
- farm_rent_income
- qualified_tuition_expenses
- educator_expense
- long_term_capital_gains_on_collectibles
- other_credits
- casualty_loss
- unreported_payroll_tax
- recapture_of_investment_credit
- deductible_mortgage_interest
- home_mortgage_interest
- investment_interest_expense
- other_health_insurance_premiums
- qualified_reit_and_ptp_income
- qualified_bdc_income
- farm_operations_income
- estate_income_would_be_qualified
- farm_operations_income_would_be_qualified
- farm_rent_income_would_be_qualified
- partnership_s_corp_income_would_be_qualified
- rental_income_would_be_qualified
- self_employment_income_would_be_qualified
condition_on: [demographics]
order: spine_first
synthesize: true

targets:
arch:
country: us
model_year: 2024
target_profile: pe_native_broad
calibration_target_profile: pe_native_broad_source_backed

calibrate:
loss: pe_native_bucketed_huber_v1
method: apg
102 changes: 102 additions & 0 deletions tests/specs/test_us_2024_spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from __future__ import annotations

from importlib.resources import files
from pathlib import Path

from microplex.spec import DEMOGRAPHICS_TOKEN, ImputationOrder, SpineMethod, load_spec

from microplex_us.pipelines.us import (
PUF_SUPPORT_CLONE_IMPUTED_VARIABLES,
PUF_SUPPORT_CLONE_OVERRIDDEN_VARIABLES,
PUF_SUPPORT_CLONE_SPECIAL_VARIABLES,
)
from microplex_us.variables import PE_STYLE_PUF_IRS_DEMOGRAPHIC_PREDICTORS

SPEC_PATH = Path(str(files("microplex_us.specs").joinpath("us-2024.yaml")))


def _spec():
return load_spec(SPEC_PATH)


def test_us_2024_spec_loads_and_names_release_surface() -> None:
spec = _spec()

assert spec.meta.country == "us"
assert spec.meta.model_year == 2024
assert spec.meta.policyengine_model == "policyengine-us"
assert spec.sources["cps_asec"].dataset == "cps_asec_2025_calendar_2024"
assert spec.sources["puf"].dataset == "puf_2024"
assert set(spec.sources) == {"cps_asec", "puf", "acs", "sipp", "scf"}

assert spec.targets is not None
assert spec.targets.arch.country == "us"
assert spec.targets.arch.model_year == 2024
assert spec.targets.arch.target_profile == "pe_native_broad"
assert (
spec.targets.arch.resolved_calibration_target_profile
== "pe_native_broad_source_backed"
)
assert spec.calibrate is not None
assert spec.calibrate.loss == "pe_native_bucketed_huber_v1"
assert spec.calibrate.method.value == "apg"


def test_us_2024_spec_declares_ecps_clone_spine() -> None:
spec = _spec()

assert spec.spine.base == "cps_asec"
assert spec.spine.method is SpineMethod.CLONE
assert spec.spine.clone.seed == 20260529
assert spec.spine.passthrough_half.name == "cps_keep"
assert spec.spine.passthrough_half.keep == "all"
assert spec.spine.synthetic_half.name == "synthetic_puf"
assert spec.spine.synthetic_half.strip_to == [DEMOGRAPHICS_TOKEN]


def test_us_2024_spec_declares_demographic_only_puf_synthesis() -> None:
spec = _spec()
all_puf_vars = list(
PUF_SUPPORT_CLONE_IMPUTED_VARIABLES + PUF_SUPPORT_CLONE_SPECIAL_VARIABLES
)

synthetic, cps_fill, cps_override = spec.imputation

assert synthetic.onto == "synthetic_puf"
assert synthetic.from_ == "puf"
assert synthetic.vars == all_puf_vars
assert synthetic.condition_on == [DEMOGRAPHICS_TOKEN]
assert synthetic.order is ImputationOrder.SPINE_FIRST
assert synthetic.synthesize is True

assert cps_fill.onto == "cps_keep"
assert cps_fill.from_ == "puf"
assert cps_fill.vars == all_puf_vars
assert cps_fill.condition_on == [DEMOGRAPHICS_TOKEN]
assert cps_fill.synthesize is False

assert cps_override.onto == "cps_keep"
assert cps_override.from_ == "puf"
assert cps_override.vars == list(PUF_SUPPORT_CLONE_OVERRIDDEN_VARIABLES)
assert cps_override.condition_on == [DEMOGRAPHICS_TOKEN]
assert cps_override.synthesize is True

assert set(PUF_SUPPORT_CLONE_OVERRIDDEN_VARIABLES).issubset(
PUF_SUPPORT_CLONE_IMPUTED_VARIABLES
)
assert "employment_income" in synthetic.vars
assert "employment_income" not in cps_override.vars
assert "employment_income" not in synthetic.condition_on
assert tuple(PE_STYLE_PUF_IRS_DEMOGRAPHIC_PREDICTORS) == (
"age",
"is_male",
"tax_unit_is_joint",
"tax_unit_count_dependents",
"is_tax_unit_head",
"is_tax_unit_spouse",
"is_tax_unit_dependent",
)


def test_us_2024_spec_keeps_forbes_out_of_replication_baseline() -> None:
assert "forbes" not in SPEC_PATH.read_text(encoding="utf-8").lower()
Loading
Loading