Fix sign-compare errors in quantized cpu kernels for Zephyr/GCC builds

karan1508 · facebook-github-bot · commit 2ac5836c00fb · 2026-05-19T08:35:17.000-07:00
Summary:
## Why?
The Zephyr firmware target `tycho_t3c-tsn_graph_simulator_mps3_an547_cmake` failed
to compile `xplat/executorch/kernels/quantized/cpu/embeddingxb.cpp` under
`-Werror=sign-compare`. The loop variable was declared `size_t` (unsigned) but
compared against `Tensor::dim()`, which returns `ssize_t` (signed). On 64-bit
host toolchains the warning is often suppressed or both types match in width, so
the regression slipped in via D90402567 (int32 indices support). On the 32-bit
Zephyr ARM target the mismatch is fatal. Several sibling kernels in the same
directory have the identical latent pattern and would break the moment they get
pulled into a Zephyr build path.

## What?
Match the loop variable's type to the signed return type of `Tensor::dim()` by
switching `size_t` to `ssize_t` everywhere this pattern appears in the quantized
cpu kernels and two test files. This is also the safer pattern — casting to
`size_t` would turn a negative `dim()` into `SIZE_MAX` and overflow the
fixed-size stack buffer used for sizes. The change is mirrored across the
xplat and fbcode copies so the diff_train sync stays consistent.

Differential Revision: D105701588
diff --git a/kernels/quantized/cpu/embeddingxb.cpp b/kernels/quantized/cpu/embeddingxb.cpp
@@ -224,7 +224,7 @@ void resize_out_tensor(
     Tensor& out,
     int weight_nbit) {
   executorch::aten::SizesType expected_output_size[kTensorDimensionLimit];
-  for (size_t i = 0; i < indices.dim(); i++) {
+  for (ssize_t i = 0; i < indices.dim(); i++) {
     expected_output_size[i] = indices.size(i);
   }
   const size_t embedding_dim = get_embedding_dim(weight.size(1), weight_nbit);
diff --git a/kernels/quantized/cpu/op_dequantize.cpp b/kernels/quantized/cpu/op_dequantize.cpp
@@ -590,7 +590,7 @@ Tensor& dequantize_per_token_out(
     Tensor& out) {
   // Refactor this into a util
   size_t num_channels = 1;
-  for (size_t i = 0; i < input.dim() - 1; i++) {
+  for (ssize_t i = 0; i < input.dim() - 1; i++) {
     num_channels *= input.size(i);
   }
   // This unfortunate change is needed because we compile op_quantize for aten
diff --git a/kernels/quantized/cpu/op_embedding.cpp b/kernels/quantized/cpu/op_embedding.cpp
@@ -200,7 +200,7 @@ void resize_out_tensor(
     const Tensor& indices,
     Tensor& out) {
   executorch::aten::SizesType expected_output_size[kTensorDimensionLimit];
-  for (size_t i = 0; i < indices.dim(); i++) {
+  for (ssize_t i = 0; i < indices.dim(); i++) {
     expected_output_size[i] = indices.size(i);
   }
   const size_t embedding_dim = weight.size(1);
diff --git a/kernels/quantized/cpu/op_quantize.cpp b/kernels/quantized/cpu/op_quantize.cpp
@@ -642,7 +642,7 @@ Tensor& quantize_per_token_out(
     ScalarType dtype,
     Tensor& out) {
   size_t num_tokens = 1;
-  for (size_t i = 0; i < input.dim() - 1; i++) {
+  for (ssize_t i = 0; i < input.dim() - 1; i++) {
     num_tokens *= input.size(i);
   }
 // This unfortunate change is needed because we compile op_quantize for aten
diff --git a/kernels/test/custom_kernel_example/op_relu.cpp b/kernels/test/custom_kernel_example/op_relu.cpp
@@ -38,7 +38,7 @@ void relu(const Tensor& input, Tensor& output) {
   CTYPE* out_data = output.data_ptr<CTYPE>();
   size_t lim = input.numel();
   Tensor::SizesType expected_output_size[16];
-  for (size_t i = 0; i < output.dim(); ++i) {
+  for (ssize_t i = 0; i < output.dim(); ++i) {
     expected_output_size[i] = input.size(i);
   }
   auto error = resize_tensor(
diff --git a/kernels/test/op_split_copy_test.cpp b/kernels/test/op_split_copy_test.cpp
@@ -288,7 +288,7 @@ TEST_F(OpSplitCopyTensorOutTest, LargerSplitSizeDoesNothing) {
   std::vector<Tensor> expected_out = {input};
 
   for (int64_t split_size = 3; split_size < 6; ++split_size) {
-    for (size_t dim = 0; dim < input.dim(); ++dim) {
+    for (ssize_t dim = 0; dim < input.dim(); ++dim) {
       TensorList out = tlf.zeros_like({input});
       op_split_copy_tensor_out(input, split_size, dim, out);
       EXPECT_TENSOR_LISTS_EQ(out, expected_out);

Original file line number	Diff line number	Diff line change
`@@ -224,7 +224,7 @@ void resize_out_tensor(`
`224`	`224`	`Tensor& out,`
`225`	`225`	`int weight_nbit) {`
`226`	`226`	`executorch::aten::SizesType expected_output_size[kTensorDimensionLimit];`
`227`		`- for (size_t i = 0; i < indices.dim(); i++) {`
	`227`	`+ for (ssize_t i = 0; i < indices.dim(); i++) {`
`228`	`228`	`expected_output_size[i] = indices.size(i);`
`229`	`229`	`}`
`230`	`230`	`const size_t embedding_dim = get_embedding_dim(weight.size(1), weight_nbit);`
Original file line number	Diff line number	Diff line change
`@@ -590,7 +590,7 @@ Tensor& dequantize_per_token_out(`
`590`	`590`	`Tensor& out) {`
`591`	`591`	`// Refactor this into a util`
`592`	`592`	`size_t num_channels = 1;`
`593`		`- for (size_t i = 0; i < input.dim() - 1; i++) {`
	`593`	`+ for (ssize_t i = 0; i < input.dim() - 1; i++) {`
`594`	`594`	`num_channels *= input.size(i);`
`595`	`595`	`}`
`596`	`596`	`// This unfortunate change is needed because we compile op_quantize for aten`
Original file line number	Diff line number	Diff line change
`@@ -200,7 +200,7 @@ void resize_out_tensor(`
`200`	`200`	`const Tensor& indices,`
`201`	`201`	`Tensor& out) {`
`202`	`202`	`executorch::aten::SizesType expected_output_size[kTensorDimensionLimit];`
`203`		`- for (size_t i = 0; i < indices.dim(); i++) {`
	`203`	`+ for (ssize_t i = 0; i < indices.dim(); i++) {`
`204`	`204`	`expected_output_size[i] = indices.size(i);`
`205`	`205`	`}`
`206`	`206`	`const size_t embedding_dim = weight.size(1);`
Original file line number	Diff line number	Diff line change
`@@ -642,7 +642,7 @@ Tensor& quantize_per_token_out(`
`642`	`642`	`ScalarType dtype,`
`643`	`643`	`Tensor& out) {`
`644`	`644`	`size_t num_tokens = 1;`
`645`		`- for (size_t i = 0; i < input.dim() - 1; i++) {`
	`645`	`+ for (ssize_t i = 0; i < input.dim() - 1; i++) {`
`646`	`646`	`num_tokens *= input.size(i);`
`647`	`647`	`}`
`648`	`648`	`// This unfortunate change is needed because we compile op_quantize for aten`
Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,7 @@ void relu(const Tensor& input, Tensor& output) {`
`38`	`38`	`CTYPE* out_data = output.data_ptr<CTYPE>();`
`39`	`39`	`size_t lim = input.numel();`
`40`	`40`	`Tensor::SizesType expected_output_size[16];`
`41`		`- for (size_t i = 0; i < output.dim(); ++i) {`
	`41`	`+ for (ssize_t i = 0; i < output.dim(); ++i) {`
`42`	`42`	`expected_output_size[i] = input.size(i);`
`43`	`43`	`}`
`44`	`44`	`auto error = resize_tensor(`