pytorch · SaoirseARM · Dec 4, 2025 · Dec 19, 2025
diff --git a/backends/cortex_m/ops/cortex_m_ops_common.h b/backends/cortex_m/ops/cortex_m_ops_common.h
@@ -74,26 +74,22 @@ inline void validate_cmsis_nn_tensor_requirements(
 }
 
 inline void validate_single_quant_params(
-    const Scalar& zero_point,
-    const Scalar& multiplier,
-    const Scalar& shift,
+    const int64_t zero_point,
+    const int64_t multiplier,
+    const int64_t shift,
     const char* param_name) {
-  int64_t zp_val = zero_point.to<int64_t>();
-  int64_t mult_val = multiplier.to<int64_t>();
-  int64_t shift_val = shift.to<int64_t>();
-
   ET_CHECK_MSG(
-      mult_val >= std::numeric_limits<int32_t>::min() &&
-          mult_val <= std::numeric_limits<int32_t>::max(),
+      multiplier >= std::numeric_limits<int32_t>::min() &&
+          multiplier <= std::numeric_limits<int32_t>::max(),
       "%s multiplier must be in int32 range [Value: %d]",
       param_name,
-      mult_val);
+      multiplier);
 
   ET_CHECK_MSG(
-      shift_val >= -31 && shift_val <= 31,
+      shift >= -31 && shift <= 31,
       "%s shift must be in range [-31, 31] [Value: %d]",
       param_name,
-      shift_val);
+      shift);
 }
 
 /**
@@ -108,15 +104,15 @@ inline void validate_single_quant_params(
  * Raises errors via ET_KERNEL_CHECK if any check fails.
  */
 inline void validate_quantization_params(
-    const Scalar& zero_point1,
-    const Scalar& multiplier1,
-    const Scalar& shift1,
-    const Scalar& zero_point2,
-    const Scalar& multiplier2,
-    const Scalar& shift2,
-    const Scalar& output_zero_point,
-    const Scalar& output_multiplier,
-    const Scalar& output_shift,
+    const int64_t zero_point1,
+    const int64_t multiplier1,
+    const int64_t shift1,
+    const int64_t zero_point2,
+    const int64_t multiplier2,
+    const int64_t shift2,
+    const int64_t output_zero_point,
+    const int64_t output_multiplier,
+    const int64_t output_shift,
     Tensor& output) {
   validate_single_quant_params(
       zero_point1, multiplier1, shift1, "Single quant Input1");
@@ -171,8 +167,8 @@ inline bool is_channel_broadcast(const Tensor& tensor1, const Tensor& tensor2) {
 // multiplier: Range {ARM_NN_Q31_MIN + 1, Q32_MAX}
 // shift     : Range {-31, 30}
 inline bool validate_per_channel_quant_params(
-    const int32_t* multipliers,
-    const int32_t* shifts,
+    const IntArrayRef multipliers,
+    const IntArrayRef shifts,
     int num_channels) {
   for (int i = 0; i < num_channels; ++i) {
     // Multiplier: {ARM_NN_Q31_MIN + 1, ARM_NN_Q31_MAX}

diff --git a/backends/cortex_m/ops/op_quantized_add.cpp b/backends/cortex_m/ops/op_quantized_add.cpp
@@ -21,16 +21,16 @@ using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
 Tensor& quantized_add_out(
     KernelRuntimeContext& context,
     const Tensor& input1_int8,
-    const Scalar& input1_zero_point,
-    const Scalar& input1_multiplier,
-    const Scalar& input1_shift,
+    const int64_t input1_zero_point,
+    const int64_t input1_multiplier,
+    const int64_t input1_shift,
     const Tensor& input2_int8,
-    const Scalar& input2_zero_point,
-    const Scalar& input2_multiplier,
-    const Scalar& input2_shift,
-    const Scalar& output_zero_point,
-    const Scalar& output_multiplier,
-    const Scalar& output_shift,
+    const int64_t input2_zero_point,
+    const int64_t input2_multiplier,
+    const int64_t input2_shift,
+    const int64_t output_zero_point,
+    const int64_t output_multiplier,
+    const int64_t output_shift,
     Tensor& out) {
   // Validate tensor types and dim order
   bool channel_broadcast = is_channel_broadcast(input1_int8, input2_int8);

diff --git a/backends/cortex_m/ops/op_quantized_avg_pool2d.cpp b/backends/cortex_m/ops/op_quantized_avg_pool2d.cpp
@@ -22,9 +22,9 @@ Tensor& quantized_avg_pool2d_out(
     const IntArrayRef kernel_size,
     const IntArrayRef stride,
     const IntArrayRef padding,
-    const Scalar& zero_point,
-    const Scalar& multiplier,
-    const Scalar& shift,
+    const int64_t zero_point,
+    const int64_t multiplier,
+    const int64_t shift,
     Tensor& out) {
   if (input.dim() != 4 || out.dim() != 4) {
     ET_LOG(Error, "quantized_avg_pool2d_out: tensors must be 4-D");

diff --git a/backends/cortex_m/ops/op_quantized_mul.cpp b/backends/cortex_m/ops/op_quantized_mul.cpp
@@ -26,12 +26,12 @@ using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
 Tensor& quantized_mul_out(
     KernelRuntimeContext& context,
     const Tensor& input1_int8,
-    const Scalar& input1_zero_point,
+    const int64_t input1_zero_point,
     const Tensor& input2_int8,
-    const Scalar& input2_zero_point,
-    const Scalar& output_zero_point,
-    const Scalar& output_multiplier,
-    const Scalar& output_shift,
+    const int64_t input2_zero_point,
+    const int64_t output_zero_point,
+    const int64_t output_multiplier,
+    const int64_t output_shift,
     Tensor& out) {
   // Validate tensor types and quantization parameters
 
@@ -44,8 +44,8 @@ Tensor& quantized_mul_out(
       /*require_channels_last=*/channel_broadcast,
       /*require_same_sizes=*/!channel_broadcast);
 
-  const Scalar kIdentityMultiplier(/*value=*/1);
-  const Scalar kZeroShift(/*value=*/0);
+  const int32_t kIdentityMultiplier(/*value=*/1);
+  const int32_t kZeroShift(/*value=*/0);
   validate_quantization_params(
       input1_zero_point,
       kIdentityMultiplier,

diff --git a/backends/cortex_m/ops/operators.py b/backends/cortex_m/ops/operators.py
@@ -114,17 +114,17 @@ def dequantize_per_tensor_impl(
 # Define the operator schema with multipliers and shifts (11 args)
 lib.define(
     "quantized_add("
-    "Tensor self, Scalar self_zero_point, Scalar self_multiplier, Scalar self_shift, "
-    "Tensor other, Scalar other_zero_point, Scalar other_multiplier, Scalar other_shift, "
-    "Scalar output_zero_point, Scalar output_multiplier, Scalar output_shift) -> Tensor"
+    "Tensor self, int self_zero_point, int self_multiplier, int self_shift, "
+    "Tensor other, int other_zero_point, int other_multiplier, int other_shift, "
+    "int output_zero_point, int output_multiplier, int output_shift) -> Tensor"
 )
 
 # Define the operator schema with multipliers and shifts (11 args + out tensor)
 lib.define(
     "quantized_add.out("
-    "Tensor self, Scalar self_zero_point, Scalar self_multiplier, Scalar self_shift, "
-    "Tensor other, Scalar other_zero_point, Scalar other_multiplier, Scalar other_shift, "
-    "Scalar output_zero_point, Scalar output_multiplier, Scalar output_shift, "
+    "Tensor self, int self_zero_point, int self_multiplier, int self_shift, "
+    "Tensor other, int other_zero_point, int other_multiplier, int other_shift, "
+    "int output_zero_point, int output_multiplier, int output_shift, "
     "*, Tensor(a!) out) -> Tensor(a!)"
 )
 
@@ -591,9 +591,9 @@ def quantized_conv2d_impl(
     "int[] kernel_size, "
     "int[] stride, "
     "int[] padding, "
-    "Scalar zero_point, "
-    "Scalar multiplier, "
-    "Scalar shift"
+    "int zero_point, "
+    "int multiplier, "
+    "int shift"
     ") -> Tensor"
 )
 lib.define(
@@ -602,9 +602,9 @@ def quantized_conv2d_impl(
     "int[] kernel_size, "
     "int[] stride, "
     "int[] padding, "
-    "Scalar zero_point, "
-    "Scalar multiplier, "
-    "Scalar shift, "
+    "int zero_point, "
+    "int multiplier, "
+    "int shift, "
     "*, Tensor(a!) out) -> Tensor(a!)"
 )
 

diff --git a/backends/cortex_m/ops/operators.yaml b/backends/cortex_m/ops/operators.yaml
@@ -17,13 +17,13 @@
     - arg_meta: null
       kernel_name: cortex_m::dequantize_per_tensor_out
 
-- func: cortex_m::quantized_add.out(Tensor self, Scalar self_zero_point, Scalar self_multiplier, Scalar self_shift, Tensor other, Scalar other_zero_point, Scalar other_multiplier, Scalar other_shift, Scalar output_zero_point, Scalar output_multiplier, Scalar output_shift, *, Tensor(a!) out) -> Tensor(a!)
+- func: cortex_m::quantized_add.out(Tensor self, int self_zero_point, int self_multiplier, int self_shift, Tensor other, int other_zero_point, int other_multiplier, int other_shift, int output_zero_point, int output_multiplier, int output_shift, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null
       kernel_name: cortex_m::quantized_add_out
 
-- func: cortex_m::quantized_mul.out(Tensor self, Scalar self_zero_point, Tensor other, Scalar other_zero_point, Scalar output_zero_point, Scalar output_multiplier, Scalar output_shift, *, Tensor(a!) out) -> Tensor(a!)
+- func: cortex_m::quantized_mul.out(Tensor self, int self_zero_point, Tensor other, int other_zero_point, int output_zero_point, int output_multiplier, int output_shift, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null
@@ -59,7 +59,7 @@
     - arg_meta: null
       kernel_name: cortex_m::quantized_conv2d_out
 
-- func: cortex_m::quantized_avg_pool2d.out(Tensor input, int[] kernel_size, int[] stride, int[] padding, Scalar zero_point, Scalar multiplier, Scalar shift, *, Tensor(a!) out) -> Tensor(a!)
+- func: cortex_m::quantized_avg_pool2d.out(Tensor input, int[] kernel_size, int[] stride, int[] padding, int zero_point, int multiplier, int shift, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null