diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py index 52f14d326dd..d991c135e6c 100644 --- a/backends/arm/_passes/__init__.py +++ b/backends/arm/_passes/__init__.py @@ -5,6 +5,7 @@ from . import arm_pass_utils # noqa +from . import fb from .arm_pass import ArmPass # noqa # usort: skip from .annotate_decomposed_matmul import AnnotateDecomposedMatmulPass # noqa from .annotate_output_dim_order_pass import AnnotateOutputDimOrderPass # noqa @@ -80,6 +81,7 @@ ) from .decompose_var_pass import DecomposeVarPass # noqa from .decorate_fp32_to_int32_casting_pass import DecorateFp32toInt32CastingPass # noqa +from .fb.reciprocal_dual_lut_pass import ReciprocalDualLutPass # noqa from .fold_qdq_with_annotated_qparams_pass import ( # noqa FoldAndAnnotateQParamsPass, QuantizeClampArgumentsPass, diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py index a3eafa5aac1..487beb736da 100644 --- a/backends/arm/_passes/arm_pass_manager.py +++ b/backends/arm/_passes/arm_pass_manager.py @@ -107,6 +107,7 @@ RewriteConvPass, RewriteMatmulPass, RewriteUpsamplePass, + ReciprocalDualLutPass, ScalarsToAttributePass, SizeAdjustInputPass, ToTosaMemoryFormatPass, @@ -329,6 +330,8 @@ def _tosa_pipeline( FuseViewCopyTransformPass(), DecomposeConvWithInt16ActivationPass(), DecomposeSumPass(), + # FIX: Fixed LUT generation, re-enabled dual LUT pass + ReciprocalDualLutPass(exported_program), InsertTableOpsPass(exported_program), ] ) diff --git a/backends/arm/test/ops/test_rsqrt.py b/backends/arm/test/ops/test_rsqrt.py index b6a8ff687b7..1f444c91e6c 100644 --- a/backends/arm/test/ops/test_rsqrt.py +++ b/backends/arm/test/ops/test_rsqrt.py @@ -124,9 +124,6 @@ def test_rsqrt_tosa_INT_a16w8(test_tensor: torch.Tensor): @common.parametrize("test_tensor", Rsqrt.test_parameters) @common.XfailIfNoCorstone300 -@pytest.mark.xfail( - reason="MLETORCH-707: AssertionError: Output 0 does not match reference output." -) def test_rsqrt_16a8w_u55_INT(test_tensor: torch.Tensor): """Test rsqrt operation with int16 I/O quantization for U55""" # Use wider tolerances for int16 I/O quantization on U55