diff --git a/backends/cortex_m/ops/cortex_m_ops_common.h b/backends/cortex_m/ops/cortex_m_ops_common.h index 71cf718c9a8..8730c9490bd 100644 --- a/backends/cortex_m/ops/cortex_m_ops_common.h +++ b/backends/cortex_m/ops/cortex_m_ops_common.h @@ -74,26 +74,22 @@ inline void validate_cmsis_nn_tensor_requirements( } inline void validate_single_quant_params( - const Scalar& zero_point, - const Scalar& multiplier, - const Scalar& shift, + const int64_t zero_point, + const int64_t multiplier, + const int64_t shift, const char* param_name) { - int64_t zp_val = zero_point.to(); - int64_t mult_val = multiplier.to(); - int64_t shift_val = shift.to(); - ET_CHECK_MSG( - mult_val >= std::numeric_limits::min() && - mult_val <= std::numeric_limits::max(), + multiplier >= std::numeric_limits::min() && + multiplier <= std::numeric_limits::max(), "%s multiplier must be in int32 range [Value: %d]", param_name, - mult_val); + multiplier); ET_CHECK_MSG( - shift_val >= -31 && shift_val <= 31, + shift >= -31 && shift <= 31, "%s shift must be in range [-31, 31] [Value: %d]", param_name, - shift_val); + shift); } /** @@ -108,15 +104,15 @@ inline void validate_single_quant_params( * Raises errors via ET_KERNEL_CHECK if any check fails. */ inline void validate_quantization_params( - const Scalar& zero_point1, - const Scalar& multiplier1, - const Scalar& shift1, - const Scalar& zero_point2, - const Scalar& multiplier2, - const Scalar& shift2, - const Scalar& output_zero_point, - const Scalar& output_multiplier, - const Scalar& output_shift, + const int64_t zero_point1, + const int64_t multiplier1, + const int64_t shift1, + const int64_t zero_point2, + const int64_t multiplier2, + const int64_t shift2, + const int64_t output_zero_point, + const int64_t output_multiplier, + const int64_t output_shift, Tensor& output) { validate_single_quant_params( zero_point1, multiplier1, shift1, "Single quant Input1"); @@ -171,8 +167,8 @@ inline bool is_channel_broadcast(const Tensor& tensor1, const Tensor& tensor2) { // multiplier: Range {ARM_NN_Q31_MIN + 1, Q32_MAX} // shift : Range {-31, 30} inline bool validate_per_channel_quant_params( - const int32_t* multipliers, - const int32_t* shifts, + const IntArrayRef multipliers, + const IntArrayRef shifts, int num_channels) { for (int i = 0; i < num_channels; ++i) { // Multiplier: {ARM_NN_Q31_MIN + 1, ARM_NN_Q31_MAX} diff --git a/backends/cortex_m/ops/op_quantized_add.cpp b/backends/cortex_m/ops/op_quantized_add.cpp index 019ab4cfb58..440af0d7eaf 100644 --- a/backends/cortex_m/ops/op_quantized_add.cpp +++ b/backends/cortex_m/ops/op_quantized_add.cpp @@ -21,16 +21,16 @@ using KernelRuntimeContext = torch::executor::KernelRuntimeContext; Tensor& quantized_add_out( KernelRuntimeContext& context, const Tensor& input1_int8, - const Scalar& input1_zero_point, - const Scalar& input1_multiplier, - const Scalar& input1_shift, + const int64_t input1_zero_point, + const int64_t input1_multiplier, + const int64_t input1_shift, const Tensor& input2_int8, - const Scalar& input2_zero_point, - const Scalar& input2_multiplier, - const Scalar& input2_shift, - const Scalar& output_zero_point, - const Scalar& output_multiplier, - const Scalar& output_shift, + const int64_t input2_zero_point, + const int64_t input2_multiplier, + const int64_t input2_shift, + const int64_t output_zero_point, + const int64_t output_multiplier, + const int64_t output_shift, Tensor& out) { // Validate tensor types and dim order bool channel_broadcast = is_channel_broadcast(input1_int8, input2_int8); diff --git a/backends/cortex_m/ops/op_quantized_avg_pool2d.cpp b/backends/cortex_m/ops/op_quantized_avg_pool2d.cpp index dfdedbd8838..c70487c7c5e 100644 --- a/backends/cortex_m/ops/op_quantized_avg_pool2d.cpp +++ b/backends/cortex_m/ops/op_quantized_avg_pool2d.cpp @@ -22,9 +22,9 @@ Tensor& quantized_avg_pool2d_out( const IntArrayRef kernel_size, const IntArrayRef stride, const IntArrayRef padding, - const Scalar& zero_point, - const Scalar& multiplier, - const Scalar& shift, + const int64_t zero_point, + const int64_t multiplier, + const int64_t shift, Tensor& out) { if (input.dim() != 4 || out.dim() != 4) { ET_LOG(Error, "quantized_avg_pool2d_out: tensors must be 4-D"); diff --git a/backends/cortex_m/ops/op_quantized_mul.cpp b/backends/cortex_m/ops/op_quantized_mul.cpp index 3d2d7657e36..bb346857117 100644 --- a/backends/cortex_m/ops/op_quantized_mul.cpp +++ b/backends/cortex_m/ops/op_quantized_mul.cpp @@ -26,12 +26,12 @@ using KernelRuntimeContext = torch::executor::KernelRuntimeContext; Tensor& quantized_mul_out( KernelRuntimeContext& context, const Tensor& input1_int8, - const Scalar& input1_zero_point, + const int64_t input1_zero_point, const Tensor& input2_int8, - const Scalar& input2_zero_point, - const Scalar& output_zero_point, - const Scalar& output_multiplier, - const Scalar& output_shift, + const int64_t input2_zero_point, + const int64_t output_zero_point, + const int64_t output_multiplier, + const int64_t output_shift, Tensor& out) { // Validate tensor types and quantization parameters @@ -44,8 +44,8 @@ Tensor& quantized_mul_out( /*require_channels_last=*/channel_broadcast, /*require_same_sizes=*/!channel_broadcast); - const Scalar kIdentityMultiplier(/*value=*/1); - const Scalar kZeroShift(/*value=*/0); + const int32_t kIdentityMultiplier(/*value=*/1); + const int32_t kZeroShift(/*value=*/0); validate_quantization_params( input1_zero_point, kIdentityMultiplier, diff --git a/backends/cortex_m/ops/operators.py b/backends/cortex_m/ops/operators.py index a33703489fd..c692c457a84 100644 --- a/backends/cortex_m/ops/operators.py +++ b/backends/cortex_m/ops/operators.py @@ -114,17 +114,17 @@ def dequantize_per_tensor_impl( # Define the operator schema with multipliers and shifts (11 args) lib.define( "quantized_add(" - "Tensor self, Scalar self_zero_point, Scalar self_multiplier, Scalar self_shift, " - "Tensor other, Scalar other_zero_point, Scalar other_multiplier, Scalar other_shift, " - "Scalar output_zero_point, Scalar output_multiplier, Scalar output_shift) -> Tensor" + "Tensor self, int self_zero_point, int self_multiplier, int self_shift, " + "Tensor other, int other_zero_point, int other_multiplier, int other_shift, " + "int output_zero_point, int output_multiplier, int output_shift) -> Tensor" ) # Define the operator schema with multipliers and shifts (11 args + out tensor) lib.define( "quantized_add.out(" - "Tensor self, Scalar self_zero_point, Scalar self_multiplier, Scalar self_shift, " - "Tensor other, Scalar other_zero_point, Scalar other_multiplier, Scalar other_shift, " - "Scalar output_zero_point, Scalar output_multiplier, Scalar output_shift, " + "Tensor self, int self_zero_point, int self_multiplier, int self_shift, " + "Tensor other, int other_zero_point, int other_multiplier, int other_shift, " + "int output_zero_point, int output_multiplier, int output_shift, " "*, Tensor(a!) out) -> Tensor(a!)" ) @@ -591,9 +591,9 @@ def quantized_conv2d_impl( "int[] kernel_size, " "int[] stride, " "int[] padding, " - "Scalar zero_point, " - "Scalar multiplier, " - "Scalar shift" + "int zero_point, " + "int multiplier, " + "int shift" ") -> Tensor" ) lib.define( @@ -602,9 +602,9 @@ def quantized_conv2d_impl( "int[] kernel_size, " "int[] stride, " "int[] padding, " - "Scalar zero_point, " - "Scalar multiplier, " - "Scalar shift, " + "int zero_point, " + "int multiplier, " + "int shift, " "*, Tensor(a!) out) -> Tensor(a!)" ) diff --git a/backends/cortex_m/ops/operators.yaml b/backends/cortex_m/ops/operators.yaml index 7c8389a8754..d8b609787cf 100644 --- a/backends/cortex_m/ops/operators.yaml +++ b/backends/cortex_m/ops/operators.yaml @@ -17,13 +17,13 @@ - arg_meta: null kernel_name: cortex_m::dequantize_per_tensor_out -- func: cortex_m::quantized_add.out(Tensor self, Scalar self_zero_point, Scalar self_multiplier, Scalar self_shift, Tensor other, Scalar other_zero_point, Scalar other_multiplier, Scalar other_shift, Scalar output_zero_point, Scalar output_multiplier, Scalar output_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cortex_m::quantized_add.out(Tensor self, int self_zero_point, int self_multiplier, int self_shift, Tensor other, int other_zero_point, int other_multiplier, int other_shift, int output_zero_point, int output_multiplier, int output_shift, *, Tensor(a!) out) -> Tensor(a!) variants: function kernels: - arg_meta: null kernel_name: cortex_m::quantized_add_out -- func: cortex_m::quantized_mul.out(Tensor self, Scalar self_zero_point, Tensor other, Scalar other_zero_point, Scalar output_zero_point, Scalar output_multiplier, Scalar output_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cortex_m::quantized_mul.out(Tensor self, int self_zero_point, Tensor other, int other_zero_point, int output_zero_point, int output_multiplier, int output_shift, *, Tensor(a!) out) -> Tensor(a!) variants: function kernels: - arg_meta: null @@ -59,7 +59,7 @@ - arg_meta: null kernel_name: cortex_m::quantized_conv2d_out -- func: cortex_m::quantized_avg_pool2d.out(Tensor input, int[] kernel_size, int[] stride, int[] padding, Scalar zero_point, Scalar multiplier, Scalar shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cortex_m::quantized_avg_pool2d.out(Tensor input, int[] kernel_size, int[] stride, int[] padding, int zero_point, int multiplier, int shift, *, Tensor(a!) out) -> Tensor(a!) variants: function kernels: - arg_meta: null