diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py
index 7266620cb..94de50065 100644
--- a/src/winml/modelkit/commands/build.py
+++ b/src/winml/modelkit/commands/build.py
@@ -494,6 +494,9 @@ def _validate_loader_tasks_for_model(
     help="Maximum autoconf re-optimization rounds (default: 3). --no-analyze sets this to 0.",
 )
 @cli_utils.allow_unsupported_nodes_option()
+@cli_utils.precision_option(
+    optional_message="When fp16, applies FP16 conversion during optimization."
+)
 @cli_utils.trust_remote_code_option(
     optional_message="Trust remote code for custom model architectures (e.g., Mu2)."
 )
@@ -514,6 +517,7 @@ def build(
     analyze: bool,
     max_optim_iterations: int | None,
     allow_unsupported_nodes: bool,
+    precision: str | None,
     trust_remote_code: bool,
     verbose: int,
     quiet: bool,
@@ -674,6 +678,8 @@ def _patch_device(cfg: WinMLBuildConfig) -> None:
         # on the key being present, matching the module-mode path which passes
         # allow_unsupported_nodes explicitly regardless of its value.
         extra_kwargs["allow_unsupported_nodes"] = allow_unsupported_nodes
+        if precision == "fp16":
+            extra_kwargs["precision"] = "fp16"
 
         if isinstance(config_or_configs, list):
             # ---- MODULE MODE: array config, one build per submodule ----
@@ -1119,6 +1125,45 @@ def _on_reoptimize(autoconf_dict: dict) -> None:
     return current_path, opt_elapsed
 
 
+def _run_fp16_stage(
+    *,
+    model_path: Path,
+    stage_timings: list[tuple[str, float | None]],
+) -> Path:
+    """Run FP16 conversion stage on an ONNX model file.
+
+    Loads the model, applies FP16 conversion with keep_io_types=True,
+    and overwrites the file in-place.
+
+    Args:
+        model_path: Path to the ONNX model to convert.
+        stage_timings: List to append (stage_name, elapsed) tuple to.
+
+    Returns:
+        The same model_path (overwritten with FP16 model).
+    """
+    from ..onnx import load_onnx, save_onnx
+    from ..optim.fp16 import convert_to_fp16
+    from ..utils.console import StageLive
+
+    with StageLive("fp16", console) as sl:
+        sl.set_status("Converting to FP16...")
+        t0 = time.monotonic()
+
+        model = load_onnx(model_path)
+        model = convert_to_fp16(model, keep_io_types=True)
+        save_onnx(model, model_path)
+
+        elapsed = time.monotonic() - t0
+        sl.set_done(elapsed)
+        sl.detail("[dim]I/O types preserved as FP32[/dim]")
+        sl.artifact(str(model_path), _safe_size(model_path))
+        sl.blank()
+
+    stage_timings.append(("FP16", elapsed))
+    return model_path
+
+
 def _run_quantize_stage(
     *,
     config: WinMLBuildConfig,
@@ -1378,6 +1423,8 @@ def _name(base: str) -> str:
 
     stage_timings.append(("Export", _export_elapsed))
 
+    _precision = extra_kwargs.pop("precision", None)
+
     # ── Optimize stage ───────────────────────────────────────────
     current_path, _ = _run_optimize_stage(
         config=config,
@@ -1395,13 +1442,24 @@ def _name(base: str) -> str:
     # Persist config after autoconf
     config_path.write_text(json.dumps(config.to_dict(), indent=2))
 
-    # ── Quantize stage ───────────────────────────────────────────
-    current_path = _run_quantize_stage(
-        config=config,
-        current_path=current_path,
-        quantized_path=quantized_path,
-        stage_timings=stage_timings,
-    )
+    # ── FP16 conversion (when --precision fp16) ──────────────────
+    if _precision == "fp16":
+        current_path = _run_fp16_stage(
+            model_path=current_path,
+            stage_timings=stage_timings,
+        )
+
+    # ── Quantize stage (skipped when FP16 — incompatible) ────────
+    if _precision == "fp16" and config.quant is not None:
+        print_stage_skip(console, "quantize", "(incompatible with --precision fp16)")
+        stage_timings.append(("Quantize", None))
+    else:
+        current_path = _run_quantize_stage(
+            config=config,
+            current_path=current_path,
+            quantized_path=quantized_path,
+            stage_timings=stage_timings,
+        )
 
     # ── Compile stage ────────────────────────────────────────────
     current_path = _run_compile_stage(
@@ -1437,6 +1495,7 @@ def _build_onnx_pipeline(
 
     max_iters: int = extra_kwargs.pop("hack_max_optim_iterations", 3)
     allow_unsupported_nodes: bool = extra_kwargs.pop("allow_unsupported_nodes", False)
+    _precision: str | None = extra_kwargs.pop("precision", None)
 
     # ── Validate + setup ─────────────────────────────────────────
     if not onnx_path.exists():
@@ -1490,13 +1549,24 @@ def _build_onnx_pipeline(
 
     config_path.write_text(json.dumps(config.to_dict(), indent=2))
 
-    # ── Quantize stage ───────────────────────────────────────────
-    current_path = _run_quantize_stage(
-        config=config,
-        current_path=current_path,
-        quantized_path=quantized_path,
-        stage_timings=stage_timings,
-    )
+    # ── FP16 conversion (when --precision fp16) ──────────────────
+    if _precision == "fp16":
+        current_path = _run_fp16_stage(
+            model_path=current_path,
+            stage_timings=stage_timings,
+        )
+
+    # ── Quantize stage (skipped when FP16 — incompatible) ────────
+    if _precision == "fp16" and config.quant is not None:
+        print_stage_skip(console, "quantize", "(incompatible with --precision fp16)")
+        stage_timings.append(("Quantize", None))
+    else:
+        current_path = _run_quantize_stage(
+            config=config,
+            current_path=current_path,
+            quantized_path=quantized_path,
+            stage_timings=stage_timings,
+        )
 
     # ── Compile stage ────────────────────────────────────────────
     current_path = _run_compile_stage(
diff --git a/src/winml/modelkit/commands/export.py b/src/winml/modelkit/commands/export.py
index 4cae34a86..7527b039e 100644
--- a/src/winml/modelkit/commands/export.py
+++ b/src/winml/modelkit/commands/export.py
@@ -130,6 +130,7 @@ def _delete_onnx_with_external_data(onnx_path: Path) -> None:
     help='JSON with shape overrides (e.g., {"sequence_length": 2048, "height": 640}).',
 )
 @cli_utils.build_config_option()
+@cli_utils.precision_option(optional_message="When fp16, applies FP16 conversion after export.")
 @cli_utils.verbosity_options()
 @click.pass_context
 def export(
@@ -148,6 +149,7 @@ def export(
     export_config: Path | None,
     shape_config: Path | None,
     config_file: Path | None,
+    precision: str | None,
 ) -> None:
     r"""Export HuggingFace model to ONNX format with HTP.
 
@@ -420,6 +422,17 @@ def export(
         )
         logger.debug("Export stats: %s", export_stats)
 
+        # Post-export FP16 conversion when --precision fp16 is specified
+        if precision == "fp16":
+            console.print("[bold]Converting to FP16...[/bold]")
+            from ..onnx import load_onnx, save_onnx
+            from ..optim.fp16 import convert_to_fp16
+
+            fp16_model = load_onnx(output_path)
+            fp16_model = convert_to_fp16(fp16_model, keep_io_types=True)
+            save_onnx(fp16_model, output_path)
+            console.print("[dim]FP16 conversion applied (I/O kept as FP32)[/dim]")
+
         # TODO: re-enable post-export optimization (shape inference, constant folding)
         # Disabled: needs validation that optimize_onnx preserves HTP hierarchy tags.
         # from ..optim.api import optimize_onnx
diff --git a/src/winml/modelkit/commands/optimize.py b/src/winml/modelkit/commands/optimize.py
index 287f9a423..355ef2a2f 100644
--- a/src/winml/modelkit/commands/optimize.py
+++ b/src/winml/modelkit/commands/optimize.py
@@ -180,6 +180,21 @@ def capability_options(func: F) -> F:
     default=None,
     help="Configuration file (YAML/JSON)",
 )
+@cli_utils.precision_option(optional_message="Applies FP16 conversion after graph optimization.")
+@click.option(
+    "--fp16-keep-io-types/--no-fp16-keep-io-types",
+    "fp16_keep_io_types",
+    default=True,
+    show_default=True,
+    help="Keep model I/O as FP32 when --precision fp16 (insert Cast at boundary)",
+)
+@click.option(
+    "--fp16-op-block-list",
+    "fp16_op_block_list",
+    type=str,
+    default=None,
+    help="Comma-separated list of op types to keep in FP32 (e.g., LayerNorm,Softmax)",
+)
 @cli_utils.verbosity_options()
 @capability_options
 @click.pass_context  # type: ignore[arg-type]  # capability_options widens the signature; click stubs want positional-only ctx but we keep it keyword-callable for back-compat
@@ -190,6 +205,9 @@ def optimize(
     model: Path | None,
     output: Path | None,
     config: Path | None,
+    precision: str | None,
+    fp16_keep_io_types: bool,
+    fp16_op_block_list: str | None,
     verbose: int,
     quiet: bool,
     **kwargs: Any,
@@ -224,6 +242,17 @@ def optimize(
         # Basic optimization with GELU fusion
         winml optimize -m model.onnx -o model_opt.onnx --enable-gelu-fusion
 
+        # Convert model to FP16 (after graph optimization)
+        winml optimize -m model.onnx -o fp16.onnx --precision fp16
+
+        # FP16 without preserving I/O types
+        winml optimize -m model.onnx -o fp16.onnx --precision fp16 \
+            --no-fp16-keep-io-types
+
+        # FP16 with specific ops kept in FP32
+        winml optimize -m model.onnx -o fp16.onnx --precision fp16 \
+            --fp16-op-block-list LayerNorm,Softmax
+
         # Use config file
         winml optimize -m model.onnx -c config.toml
     """
@@ -406,6 +435,22 @@ def optimize(
         optimizer = Optimizer()
         optimized_model = optimizer.optimize(onnx_model, **optimizer_kwargs)
 
+        # Post-optimization FP16 conversion (command-layer, not a pipe)
+        if precision == "fp16":
+            from ..optim.fp16 import convert_to_fp16
+
+            console.print("[bold]Converting to FP16...[/bold]")
+            op_block = (
+                [s.strip() for s in fp16_op_block_list.split(",") if s.strip()]
+                if fp16_op_block_list
+                else None
+            )
+            optimized_model = convert_to_fp16(
+                optimized_model,
+                keep_io_types=fp16_keep_io_types,
+                op_block_list=op_block,
+            )
+
         console.print("[bold]Saving optimized model...[/bold]")
         save_onnx(optimized_model, output)
 
diff --git a/src/winml/modelkit/optim/__init__.py b/src/winml/modelkit/optim/__init__.py
index dc1f3a983..dd0196219 100644
--- a/src/winml/modelkit/optim/__init__.py
+++ b/src/winml/modelkit/optim/__init__.py
@@ -27,6 +27,7 @@
 from .api import optimize_onnx
 from .config import WinMLOptimizationConfig
 from .errors import ConfigurationError, ModelValidationError, OptimizationError
+from .fp16 import convert_to_fp16
 from .optimizer import Optimizer
 from .registry import (
     BoolCapability,
@@ -48,6 +49,7 @@
     "Optimizer",
     "WinMLOptimizationConfig",
     "auto_enable_dependencies",
+    "convert_to_fp16",
     "optimize_onnx",
     "validate",
     "validate_dependencies",
diff --git a/src/winml/modelkit/optim/fp16.py b/src/winml/modelkit/optim/fp16.py
new file mode 100644
index 000000000..0c0b373bf
--- /dev/null
+++ b/src/winml/modelkit/optim/fp16.py
@@ -0,0 +1,91 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""FP16 conversion utility for ONNX models.
+
+Provides a single entry point for FP32→FP16 model conversion, used by
+all CLI commands (optimize, build, export) at the command layer.
+
+This is NOT an optimizer pipe — FP16 is a precision transformation (like
+quantization), not a graph optimization. It runs after optimization and
+before quantization in the build pipeline.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+
+if TYPE_CHECKING:
+    import onnx
+
+logger = logging.getLogger(__name__)
+
+
+def convert_to_fp16(
+    model: onnx.ModelProto,
+    *,
+    keep_io_types: bool = True,
+    op_block_list: list[str] | None = None,
+) -> onnx.ModelProto:
+    """Convert an ONNX model from FP32 to FP16 precision.
+
+    Uses onnxruntime.transformers.float16.convert_float_to_float16 internally.
+    No new dependencies — ORT is already a project dependency.
+
+    Note: ORT's converter mutates the model in-place and returns the same object.
+
+    Args:
+        model: Input ONNX ModelProto (will be mutated in-place by ORT).
+        keep_io_types: If True, preserve FP32 model inputs/outputs by inserting
+            Cast nodes at boundaries. Recommended for CPU-safe inference.
+        op_block_list: Op types to keep in FP32 (e.g., ["LayerNorm", "Softmax"]).
+            When None, ORT uses its DEFAULT_OP_BLOCK_LIST which includes ops
+            known to be numerically unsafe in FP16 (e.g., TopK, CumSum, etc.).
+
+    Returns:
+        The converted model (same object as input due to ORT in-place mutation).
+    """
+    from onnx import TensorProto
+    from onnxruntime.transformers.float16 import convert_float_to_float16
+
+    # Skip if model is already FP16 (check floating-point initializer dtypes)
+    fp32_types = {TensorProto.FLOAT, TensorProto.DOUBLE, TensorProto.BFLOAT16}
+    initializers = model.graph.initializer
+    if initializers:
+        float_inits = [t for t in initializers if t.data_type in fp32_types | {TensorProto.FLOAT16}]
+        if float_inits and all(t.data_type == TensorProto.FLOAT16 for t in float_inits):
+            logger.info("Model is already FP16 — skipping conversion.")
+            return model
+
+    original_nodes = len(model.graph.node)
+
+    logger.info("Converting model to FP16...")
+    if keep_io_types:
+        logger.info("  Keeping I/O types as FP32")
+    if op_block_list:
+        logger.info("  Keeping ops in FP32: %s", op_block_list)
+
+    converted = convert_float_to_float16(
+        model,
+        keep_io_types=keep_io_types,
+        op_block_list=op_block_list,
+    )
+
+    # ORT's converter appends Cast nodes at the end of the node list (for
+    # keep_io_types), which breaks topological ordering. Re-sort the graph
+    # using ORT's own topological sort utility.
+    if keep_io_types:
+        from onnxruntime.transformers.onnx_model import OnnxModel
+
+        OnnxModel.graph_topological_sort(converted.graph)
+
+    converted_nodes = len(converted.graph.node)
+    if converted_nodes != original_nodes:
+        logger.info("FP16 conversion complete: %d -> %d nodes", original_nodes, converted_nodes)
+    else:
+        logger.info("FP16 conversion complete: %d nodes", converted_nodes)
+
+    return converted
diff --git a/src/winml/modelkit/utils/cli.py b/src/winml/modelkit/utils/cli.py
index 8f50fd006..6f910a512 100644
--- a/src/winml/modelkit/utils/cli.py
+++ b/src/winml/modelkit/utils/cli.py
@@ -396,6 +396,34 @@ def allow_unsupported_nodes_option(optional_message: str | None = None) -> Calla
     )
 
 
+def precision_option(
+    required: bool = False,
+    optional_message: str | None = None,
+) -> Callable[[F], F]:
+    """Add shared --precision option to a Click command.
+
+    Consistent with winml perf, winml eval, winml config. Values: fp32, fp16.
+
+    Args:
+        required: Whether the option is required.
+        optional_message: Extra guidance appended to help text.
+
+    Returns:
+        Decorator function.
+    """
+    help_text = "Model precision: fp32 (default) or fp16."
+    if optional_message:
+        help_text = f"{help_text} {optional_message}"
+
+    return click.option(
+        "--precision",
+        type=click.Choice(["fp32", "fp16"]),
+        default=None,
+        required=required,
+        help=help_text,
+    )
+
+
 def load_build_config(config_path: Path) -> tuple[WinMLBuildConfig, dict]:
     """Load a WinMLBuildConfig from a JSON file.
 
diff --git a/tests/unit/optim/test_fp16.py b/tests/unit/optim/test_fp16.py
new file mode 100644
index 000000000..769ab323a
--- /dev/null
+++ b/tests/unit/optim/test_fp16.py
@@ -0,0 +1,148 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""FP16 conversion utility tests.
+
+Tests for winml.modelkit.optim.fp16.convert_to_fp16 which converts
+FP32 ONNX models to FP16 precision.
+
+Following Cardinal Rules:
+- CARDINAL RULE #1: No hardcoded model architectures
+- CARDINAL RULE #2: All tests use pytest with code-generated results
+- CARDINAL RULE #3: Tests must run and pass
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import onnx
+from onnx import TensorProto, numpy_helper
+
+from winml.modelkit.optim import convert_to_fp16
+
+
+# =============================================================================
+# HELPERS
+# =============================================================================
+
+
+def _build_simple_fp32_model() -> onnx.ModelProto:
+    """Build a simple FP32 model: out = x + weight."""
+    x = onnx.helper.make_tensor_value_info("x", TensorProto.FLOAT, [1, 4])
+    out = onnx.helper.make_tensor_value_info("out", TensorProto.FLOAT, [1, 4])
+    weight = numpy_helper.from_array(np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32), "weight")
+    add = onnx.helper.make_node("Add", ["x", "weight"], ["out"], name="add")
+    graph = onnx.helper.make_graph([add], "simple", [x], [out], [weight])
+    return onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 17)])
+
+
+def _build_multi_op_fp32_model() -> onnx.ModelProto:
+    """Build a model with multiple ops: out = Relu(x + weight)."""
+    x = onnx.helper.make_tensor_value_info("x", TensorProto.FLOAT, [1, 4])
+    out = onnx.helper.make_tensor_value_info("out", TensorProto.FLOAT, [1, 4])
+    weight = numpy_helper.from_array(np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32), "weight")
+    add = onnx.helper.make_node("Add", ["x", "weight"], ["add_out"], name="add")
+    relu = onnx.helper.make_node("Relu", ["add_out"], ["out"], name="relu")
+    graph = onnx.helper.make_graph([add, relu], "multi_op", [x], [out], [weight])
+    return onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 17)])
+
+
+# =============================================================================
+# CONVERT_TO_FP16 TESTS
+# =============================================================================
+
+
+class TestConvertToFP16:
+    """Test convert_to_fp16 utility function."""
+
+    def test_converts_weights_to_fp16(self) -> None:
+        """FP16 conversion converts float32 initializers to float16."""
+        model = _build_simple_fp32_model()
+        result = convert_to_fp16(model)
+
+        has_fp16 = any(init.data_type == TensorProto.FLOAT16 for init in result.graph.initializer)
+        assert has_fp16, "Expected at least one FP16 initializer after conversion"
+
+    def test_default_keeps_io_types(self) -> None:
+        """Default keep_io_types=True preserves FP32 model I/O."""
+        model = _build_simple_fp32_model()
+        result = convert_to_fp16(model, keep_io_types=True)
+
+        for inp in result.graph.input:
+            assert inp.type.tensor_type.elem_type == TensorProto.FLOAT
+        for outp in result.graph.output:
+            assert outp.type.tensor_type.elem_type == TensorProto.FLOAT
+
+    def test_keep_io_types_false_converts_io(self) -> None:
+        """With keep_io_types=False, model I/O becomes FP16."""
+        model = _build_simple_fp32_model()
+        result = convert_to_fp16(model, keep_io_types=False)
+
+        for inp in result.graph.input:
+            assert inp.type.tensor_type.elem_type == TensorProto.FLOAT16
+        for outp in result.graph.output:
+            assert outp.type.tensor_type.elem_type == TensorProto.FLOAT16
+
+    def test_preserves_model_structure(self) -> None:
+        """FP16 conversion preserves graph structure (node count diff ≤ 2)."""
+        model = _build_multi_op_fp32_model()
+        original_count = len(model.graph.node)
+        result = convert_to_fp16(model, keep_io_types=True)
+        converted_count = len(result.graph.node)
+
+        assert converted_count - original_count <= 2, (
+            f"Node count changed from {original_count} to {converted_count}, "
+            f"difference {converted_count - original_count} exceeds threshold of 2"
+        )
+
+    def test_op_block_list_keeps_ops_in_fp32(self) -> None:
+        """Ops in block list should remain operating on FP32 data."""
+        model = _build_multi_op_fp32_model()
+        result = convert_to_fp16(model, op_block_list=["Relu"])
+
+        op_types = [n.op_type for n in result.graph.node]
+        assert "Cast" in op_types, "Expected Cast nodes for blocked ops"
+
+    def test_none_op_block_list_uses_ort_defaults(self) -> None:
+        """When op_block_list is None, ORT uses its DEFAULT_OP_BLOCK_LIST."""
+        model = _build_simple_fp32_model()
+        # Should not raise — ORT applies its default safety list
+        result = convert_to_fp16(model, op_block_list=None)
+        assert result is not None
+
+    def test_skips_already_fp16_model(self) -> None:
+        """If all floating-point initializers are already FP16, conversion is skipped."""
+        # Build a model with FP16 initializers directly
+        x = onnx.helper.make_tensor_value_info("x", TensorProto.FLOAT16, [1, 4])
+        out = onnx.helper.make_tensor_value_info("out", TensorProto.FLOAT16, [1, 4])
+        weight_data = np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float16)
+        weight = numpy_helper.from_array(weight_data, "weight")
+        add = onnx.helper.make_node("Add", ["x", "weight"], ["out"], name="add")
+        graph = onnx.helper.make_graph([add], "fp16_model", [x], [out], [weight])
+        model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 17)])
+
+        original_nodes = len(model.graph.node)
+        result = convert_to_fp16(model)
+
+        # Should return the same model unchanged (no Cast nodes inserted)
+        assert len(result.graph.node) == original_nodes
+        assert result is model
+
+    def test_skips_fp16_model_with_int_initializers(self) -> None:
+        """FP16 model with non-float initializers (e.g. INT64 shapes) should still skip."""
+        x = onnx.helper.make_tensor_value_info("x", TensorProto.FLOAT16, [1, 4])
+        out = onnx.helper.make_tensor_value_info("out", TensorProto.FLOAT16, [1, 4])
+        weight_data = np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float16)
+        weight = numpy_helper.from_array(weight_data, "weight")
+        # INT64 initializer (e.g., shape tensor) — should be ignored by skip logic
+        shape_tensor = numpy_helper.from_array(np.array([1, 4], dtype=np.int64), "shape")
+        add = onnx.helper.make_node("Add", ["x", "weight"], ["out"], name="add")
+        graph = onnx.helper.make_graph([add], "fp16_mixed", [x], [out], [weight, shape_tensor])
+        model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 17)])
+
+        original_nodes = len(model.graph.node)
+        result = convert_to_fp16(model)
+
+        assert len(result.graph.node) == original_nodes
+        assert result is model
diff --git a/tests/unit/optim/test_optimizer.py b/tests/unit/optim/test_optimizer.py
index fe20d8401..1e51af2b7 100644
--- a/tests/unit/optim/test_optimizer.py
+++ b/tests/unit/optim/test_optimizer.py
@@ -698,7 +698,7 @@ def test_resolve_dependencies_method(self) -> None:
     def test_registered_pipes_count(self) -> None:
         """Verify the expected number of pipes are registered."""
         Optimizer._initialize_pipes()
-        # Currently: RewritePipe, ORTGraphPipe, ORTFusionPipe, SurgeryPipe
+        # Currently: ORTGraphPipe, RewritePipe, ORTFusionPipe, SurgeryPipe
         assert len(Optimizer.pipes) == 4
 
     def test_registered_pipe_names(self) -> None: