From d4c8f29c54f1bf45543fffd2d995b06dc015c53b Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Wed, 11 Jun 2025 13:18:18 -0700 Subject: [PATCH 1/6] [Exutorch] Add broadcast support for le op For refactored hf repro requires this to support mask generation Differential Revision: [D76456398](https://our.internmc.facebook.com/intern/diff/D76456398/) [ghstack-poisoned] --- kernels/optimized/cpu/op_le.cpp | 81 +- kernels/test/op_le_test.cpp | 812 ++++++++++++++++++ .../optimized/op_registration_util.bzl | 3 + 3 files changed, 851 insertions(+), 45 deletions(-) diff --git a/kernels/optimized/cpu/op_le.cpp b/kernels/optimized/cpu/op_le.cpp index 94c2d5ffa76..2513155aac1 100644 --- a/kernels/optimized/cpu/op_le.cpp +++ b/kernels/optimized/cpu/op_le.cpp @@ -6,12 +6,14 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include #include #include #include +#include namespace torch { namespace executor { @@ -79,52 +81,41 @@ Tensor& opt_le_tensor_out( return out; } - ET_KERNEL_CHECK(ctx, tensors_have_same_shape(a, b), InvalidArgument, out); - - // Resize for dynamic shape - auto error = resize_tensor(out, a.sizes()); - ET_KERNEL_CHECK_MSG( - ctx, - error == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - - if (a_type == b_type && a_type == out_type) { - ET_SWITCH_REAL_TYPES_AND( - Bool, out_type, ctx, "le.Tensor_out", CTYPE, [&]() { - using Vec = executorch::vec::Vectorized; - executorch::vec::map2( - [](Vec x, Vec y) { return x.le(y); }, - out.mutable_data_ptr(), - a.const_data_ptr(), - b.const_data_ptr(), - a.numel()); - }); + // Check for optimized broadcast paths + auto selected_optimized_path = select_optimized_path(a, b, out); + printf("selected_optimized_path: %d\n", static_cast(selected_optimized_path)); + if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) { + // Resize for dynamic shape + auto error = resize_tensor(out, a.sizes()); + ET_KERNEL_CHECK_MSG( + ctx, + error == Error::Ok, + InvalidArgument, + out, + "Failed to resize output tensor."); + + ET_SWITCH_REALB_TYPES(a_type, ctx, "le.Tensor_out", CTYPE, [&]() { + using Vec = executorch::vec::Vectorized; + executorch::vec::map2( + [](Vec x, Vec y) { return x.le(y); }, + out.mutable_data_ptr(), + a.const_data_ptr(), + b.const_data_ptr(), + out.numel()); + }); + } else if (selected_optimized_path != ElementwiseOptimizedPath::kNone) { + // Handle optimized broadcast cases + ET_SWITCH_REALB_TYPES(out_type, ctx, "le.Tensor_out", CTYPE, [&]() { + using Vec = executorch::vec::Vectorized; + auto le_lambda = [](auto x, auto y) { return x.le(y); }; + return torch::executor::handle_broadcast_elementwise( + ctx, le_lambda, a, b, out, selected_optimized_path); + }); } else { - ET_SWITCH_REAL_TYPES_AND( - Bool, a_type, ctx, "le.Tensor_out", CTYPE_A, [&]() { - ET_SWITCH_REAL_TYPES_AND( - Bool, b_type, ctx, "le.Tensor_out", CTYPE_B, [&]() { - using CTYPE_IN = typename torch::executor:: - promote_types::type; - ET_DCHECK( - CppTypeToScalarType::value == - promoteTypes(a_type, b_type)); - ET_SWITCH_REAL_TYPES_AND( - Bool, out_type, ctx, "le.Tensor_out", CTYPE_OUT, [&]() { - const size_t n = a.numel(); - const CTYPE_A* a_data = a.const_data_ptr(); - const CTYPE_B* b_data = b.const_data_ptr(); - CTYPE_OUT* out_data = out.mutable_data_ptr(); - for (auto i = 0; i < n; ++i) { - out_data[i] = static_cast( - static_cast(a_data[i]) <= - static_cast(b_data[i])); - } - }); - }); - }); + // @lint-ignore CLANGTIDY facebook-hte-CArray + static constexpr const char op_name[] = "le.Tensor_out"; + return internal::comparison_tensor_out( + ctx, a, b, out); } return out; diff --git a/kernels/test/op_le_test.cpp b/kernels/test/op_le_test.cpp index bcd40d24d89..eebad43ce75 100644 --- a/kernels/test/op_le_test.cpp +++ b/kernels/test/op_le_test.cpp @@ -186,3 +186,815 @@ TEST_F(OpLeTensorOutTest, BroadcastTest) { op_le_tensor_out(a, b, out); EXPECT_TENSOR_EQ(out, tf.make({1, 4}, {true, true, true, false})); } + +TEST_F(OpLeTensorOutTest, Broadcast2DTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case: (1, 10) and (6, 1) -> (6, 10) + Tensor a = + tf.make(/*sizes=*/{1, 10}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + Tensor b = tf.make({6, 1}, {2, 4, 6, 8, 10, 12}); + + Tensor out = tf_bool.zeros({6, 10}); + + op_le_tensor_out(a, b, out); + + // Expected: each row i should be [1<=b[i], 2<=b[i], ..., 10<=b[i]] + // Row 0: b[0]=2, so [1<=2, 2<=2, 3<=2, ...] = [true, true, false, false, ...] + // Row 1: b[1]=4, so [1<=4, 2<=4, 3<=4, 4<=4, 5<=4, ...] = [true, true, true, + // true, false, ...] + using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + std::vector expected_data = { + // Row 0 (b=2): 1<=2, 2<=2, 3<=2, 4<=2, 5<=2, 6<=2, 7<=2, 8<=2, 9<=2, + // 10<=2 + true, + true, + false, + false, + false, + false, + false, + false, + false, + false, + // Row 1 (b=4): 1<=4, 2<=4, 3<=4, 4<=4, 5<=4, 6<=4, 7<=4, 8<=4, 9<=4, + // 10<=4 + true, + true, + true, + true, + false, + false, + false, + false, + false, + false, + // Row 2 (b=6): 1<=6, 2<=6, 3<=6, 4<=6, 5<=6, 6<=6, 7<=6, 8<=6, 9<=6, + // 10<=6 + true, + true, + true, + true, + true, + true, + false, + false, + false, + false, + // Row 3 (b=8): 1<=8, 2<=8, 3<=8, 4<=8, 5<=8, 6<=8, 7<=8, 8<=8, 9<=8, + // 10<=8 + true, + true, + true, + true, + true, + true, + true, + true, + false, + false, + // Row 4 (b=10): 1<=10, 2<=10, 3<=10, 4<=10, 5<=10, 6<=10, 7<=10, 8<=10, + // 9<=10, 10<=10 + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + // Row 5 (b=12): 1<=12, 2<=12, 3<=12, 4<=12, 5<=12, 6<=12, 7<=12, 8<=12, + // 9<=12, 10<=12 + true, + true, + true, + true, + true, + true, + true, + true, + true, + true}; + + EXPECT_TENSOR_EQ( + out, + tf_bool.make( + {6, 10}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, Broadcast1DTo2DTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case: (6,) and (1, 10) -> (6, 10) + Tensor a = tf.make({6, 1}, {2, 4, 6, 8, 10, 12}); + Tensor b = + tf.make(/*sizes=*/{1, 10}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + + Tensor out = tf_bool.zeros({6, 10}); + + op_le_tensor_out(a, b, out); + + using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + std::vector expected_data = { + // Row 0 (a=2): 2<=1, 2<=2, 2<=3, 2<=4, 2<=5, 2<=6, 2<=7, 2<=8, 2<=9, + // 2<=10 + false, + true, + true, + true, + true, + true, + true, + true, + true, + true, + // Row 1 (a=4): 4<=1, 4<=2, 4<=3, 4<=4, 4<=5, 4<=6, 4<=7, 4<=8, 4<=9, + // 4<=10 + false, + false, + false, + true, + true, + true, + true, + true, + true, + true, + // Row 2 (a=6): 6<=1, 6<=2, 6<=3, 6<=4, 6<=5, 6<=6, 6<=7, 6<=8, 6<=9, + // 6<=10 + false, + false, + false, + false, + false, + true, + true, + true, + true, + true, + // Row 3 (a=8): 8<=1, 8<=2, 8<=3, 8<=4, 8<=5, 8<=6, 8<=7, 8<=8, 8<=9, + // 8<=10 + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + // Row 4 (a=10): 10<=1, 10<=2, 10<=3, 10<=4, 10<=5, 10<=6, 10<=7, 10<=8, + // 10<=9, 10<=10 + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + // Row 5 (a=12): 12<=1, 12<=2, 12<=3, 12<=4, 12<=5, 12<=6, 12<=7, 12<=8, + // 12<=9, 12<=10 + false, + false, + false, + false, + false, + false, + false, + false, + false, + false}; + + EXPECT_TENSOR_EQ( + out, + tf_bool.make( + {6, 10}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, BroadcastReverseTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case: (6, 1) and (1, 10) -> (6, 10) (reverse of the first broadcast + // test) + Tensor a = tf.make(/*sizes=*/{6, 1}, /*data=*/{2, 4, 6, 8, 10, 12}); + Tensor b = tf.make({1, 10}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + + Tensor out = tf_bool.zeros({6, 10}); + + op_le_tensor_out(a, b, out); + + // Expected: each row i should be [a[i]<=1, a[i]<=2, ..., a[i]<=10] + using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + std::vector expected_data = { + // Row 0 (a=2): 2<=1, 2<=2, 2<=3, 2<=4, 2<=5, 2<=6, 2<=7, 2<=8, 2<=9, + // 2<=10 + false, + true, + true, + true, + true, + true, + true, + true, + true, + true, + // Row 1 (a=4): 4<=1, 4<=2, 4<=3, 4<=4, 4<=5, 4<=6, 4<=7, 4<=8, 4<=9, + // 4<=10 + false, + false, + false, + true, + true, + true, + true, + true, + true, + true, + // Row 2 (a=6): 6<=1, 6<=2, 6<=3, 6<=4, 6<=5, 6<=6, 6<=7, 6<=8, 6<=9, + // 6<=10 + false, + false, + false, + false, + false, + true, + true, + true, + true, + true, + // Row 3 (a=8): 8<=1, 8<=2, 8<=3, 8<=4, 8<=5, 8<=6, 8<=7, 8<=8, 8<=9, + // 8<=10 + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + // Row 4 (a=10): 10<=1, 10<=2, 10<=3, 10<=4, 10<=5, 10<=6, 10<=7, 10<=8, + // 10<=9, 10<=10 + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + // Row 5 (a=12): 12<=1, 12<=2, 12<=3, 12<=4, 12<=5, 12<=6, 12<=7, 12<=8, + // 12<=9, 12<=10 + false, + false, + false, + false, + false, + false, + false, + false, + false, + false}; + + EXPECT_TENSOR_EQ( + out, + tf_bool.make( + {6, 10}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, BroadcastLastDimTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcastLastDim: (3, 4, 1) and (3, 4, 5) -> (3, 4, 5) + Tensor a = tf.make( + /*sizes=*/{3, 4, 1}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor b = tf.make( + {3, 4, 5}, + { + // First 3x4 slice + 1, + 2, + 3, + 4, + 5, // row 0 + 2, + 3, + 4, + 5, + 6, // row 1 + 3, + 4, + 5, + 6, + 7, // row 2 + 4, + 5, + 6, + 7, + 8, // row 3 + // Second 3x4 slice + 5, + 6, + 7, + 8, + 9, // row 0 + 6, + 7, + 8, + 9, + 10, // row 1 + 7, + 8, + 9, + 10, + 11, // row 2 + 8, + 9, + 10, + 11, + 12, // row 3 + // Third 3x4 slice + 9, + 10, + 11, + 12, + 13, // row 0 + 10, + 11, + 12, + 13, + 14, // row 1 + 11, + 12, + 13, + 14, + 15, // row 2 + 12, + 13, + 14, + 15, + 16 // row 3 + }); + + Tensor out = tf_bool.zeros({3, 4, 5}); + + op_le_tensor_out(a, b, out); + + using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + std::vector expected_data = { + // First slice: a values are 1,2,3,4 + true, + true, + true, + true, + true, // 1 <= [1,2,3,4,5] + true, + true, + true, + true, + true, // 2 <= [2,3,4,5,6] + true, + true, + true, + true, + true, // 3 <= [3,4,5,6,7] + true, + true, + true, + true, + true, // 4 <= [4,5,6,7,8] + // Second slice: a values are 5,6,7,8 + true, + true, + true, + true, + true, // 5 <= [5,6,7,8,9] + true, + true, + true, + true, + true, // 6 <= [6,7,8,9,10] + true, + true, + true, + true, + true, // 7 <= [7,8,9,10,11] + true, + true, + true, + true, + true, // 8 <= [8,9,10,11,12] + // Third slice: a values are 9,10,11,12 + true, + true, + true, + true, + true, // 9 <= [9,10,11,12,13] + true, + true, + true, + true, + true, // 10 <= [10,11,12,13,14] + true, + true, + true, + true, + true, // 11 <= [11,12,13,14,15] + true, + true, + true, + true, + true// 12 <= [12,13,14,15,16] + }; + + EXPECT_TENSOR_EQ( + out, + tf_bool.make({3, 4, 5}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, BroadcastLastDimReverseTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcastLastDimReverseArguments: (3, 4, 5) and (3, 4, 1) -> + // (3, 4, 5) + Tensor a = tf.make( + {3, 4, 5}, + { + // First 3x4 slice + 1, + 2, + 3, + 4, + 5, // row 0 + 2, + 3, + 4, + 5, + 6, // row 1 + 3, + 4, + 5, + 6, + 7, // row 2 + 4, + 5, + 6, + 7, + 8, // row 3 + // Second 3x4 slice + 5, + 6, + 7, + 8, + 9, // row 0 + 6, + 7, + 8, + 9, + 10, // row 1 + 7, + 8, + 9, + 10, + 11, // row 2 + 8, + 9, + 10, + 11, + 12, // row 3 + // Third 3x4 slice + 9, + 10, + 11, + 12, + 13, // row 0 + 10, + 11, + 12, + 13, + 14, // row 1 + 11, + 12, + 13, + 14, + 15, // row 2 + 12, + 13, + 14, + 15, + 16 // row 3 + }); + Tensor b = tf.make( + /*sizes=*/{3, 4, 1}, + /*data=*/{5, 5, 5, 5, 10, 10, 10, 10, 15, 15, 15, 15}); + + Tensor out = tf_bool.zeros({3, 4, 5}); + + op_le_tensor_out(a, b, out); + + using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + std::vector expected_data = { + // First slice: b values are all 5 + true, + true, + true, + true, + true, // [1,2,3,4,5] <= 5 + true, + true, + true, + true, + false, // [2,3,4,5,6] <= 5 + true, + true, + true, + false, + false, // [3,4,5,6,7] <= 5 + true, + true, + false, + false, + false, // [4,5,6,7,8] <= 5 + // Second slice: b values are all 10 + true, + true, + true, + true, + true, // [5,6,7,8,9] <= 10 + true, + true, + true, + true, + true, // [6,7,8,9,10] <= 10 + true, + true, + true, + true, + false, // [7,8,9,10,11] <= 10 + true, + true, + true, + false, + false, // [8,9,10,11,12] <= 10 + // Third slice: b values are all 15 + true, + true, + true, + true, + true, // [9,10,11,12,13] <= 15 + true, + true, + true, + true, + true, // [10,11,12,13,14] <= 15 + true, + true, + true, + true, + true, // [11,12,13,14,15] <= 15 + true, + true, + true, + true, + false // [12,13,14,15,16] <= 15 + }; + + EXPECT_TENSOR_EQ( + out, + tf_bool.make({3, 4, 5}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, BroadcastNdByNdTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcastNdByNd: (2, 1, 4) and (2, 3, 4) -> (2, 3, 4) + Tensor a = tf.make(/*sizes=*/{2, 1, 4}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8}); + Tensor b = tf.make( + {2, 3, 4}, + { + // First 2x3 slice + 1, + 2, + 3, + 4, // row 0 + 2, + 3, + 4, + 5, // row 1 + 3, + 4, + 5, + 6, // row 2 + // Second 2x3 slice + 5, + 6, + 7, + 8, // row 0 + 6, + 7, + 8, + 9, // row 1 + 7, + 8, + 9, + 10 // row 2 + }); + + Tensor out = tf_bool.zeros({2, 3, 4}); + + op_le_tensor_out(a, b, out); + + using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + std::vector expected_data = { + // First slice: a[0,0,:] = [1,2,3,4] + true, + true, + true, + true, // [1,2,3,4] <= [1,2,3,4] + true, + true, + true, + true, // [1,2,3,4] <= [2,3,4,5] + true, + true, + true, + true, // [1,2,3,4] <= [3,4,5,6] + // Second slice: a[1,0,:] = [5,6,7,8] + true, + true, + true, + true, // [5,6,7,8] <= [5,6,7,8] + true, + true, + true, + true, // [5,6,7,8] <= [6,7,8,9] + true, + true, + true, + true // [5,6,7,8] <= [7,8,9,10] + }; + + EXPECT_TENSOR_EQ( + out, + tf_bool.make({2, 3, 4}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, BroadcastNdByNdReverseTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcastNdByNdReverseArguments: (2, 3, 4) and (2, 1, 4) -> + // (2, 3, 4) + Tensor a = tf.make( + {2, 3, 4}, + { + // First 2x3 slice + 1, + 2, + 3, + 4, // row 0 + 2, + 3, + 4, + 5, // row 1 + 3, + 4, + 5, + 6, // row 2 + // Second 2x3 slice + 5, + 6, + 7, + 8, // row 0 + 6, + 7, + 8, + 9, // row 1 + 7, + 8, + 9, + 10 // row 2 + }); + Tensor b = tf.make(/*sizes=*/{2, 1, 4}, /*data=*/{2, 3, 4, 5, 6, 7, 8, 9}); + + Tensor out = tf_bool.zeros({2, 3, 4}); + + op_le_tensor_out(a, b, out); + + using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + std::vector expected_data = { + // First slice: b[0,0,:] = [2,3,4,5] + true, + true, + true, + true, // [1,2,3,4] <= [2,3,4,5] + true, + true, + true, + true, // [2,3,4,5] <= [2,3,4,5] + false, + false, + false, + false, // [3,4,5,6] <= [2,3,4,5] + // Second slice: b[1,0,:] = [6,7,8,9] + true, + true, + true, + true, // [5,6,7,8] <= [6,7,8,9] + true, + true, + true, + true, // [6,7,8,9] <= [6,7,8,9] + false, + false, + false, + false // [7,8,9,10] <= [6,7,8,9] + }; + + EXPECT_TENSOR_EQ( + out, + tf_bool.make({2, 3, 4}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, Broadcast2dBy1dTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcast2dBy1d: (3, 4) and (4,) -> (3, 4) + Tensor a = tf.make( + /*sizes=*/{3, 4}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor b = tf.make({4}, {2, 4, 6, 8}); + + Tensor out = tf_bool.zeros({3, 4}); + + op_le_tensor_out(a, b, out); + + using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + std::vector expected_data = { + true, + true, + true, + true, // [1,2,3,4] <= [2,4,6,8] + false, + false, + false, + true, // [5,6,7,8] <= [2,4,6,8] + false, + false, + false, + false // [9,10,11,12] <= [2,4,6,8] + }; + + EXPECT_TENSOR_EQ( + out, + tf_bool.make({3, 4}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, Broadcast2dBy1dReverseTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcast2dBy1dReverseArguments: (4,) and (3, 4) -> (3, 4) + Tensor a = tf.make({4}, {2, 4, 6, 8}); + Tensor b = tf.make( + /*sizes=*/{3, 4}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + + Tensor out = tf_bool.zeros({3, 4}); + + op_le_tensor_out(a, b, out); + + using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + std::vector expected_data = { + false, + false, + false, + false, // [2,4,6,8] <= [1,2,3,4] + true, + true, + true, + true, // [2,4,6,8] <= [5,6,7,8] + true, + true, + true, + true // [2,4,6,8] <= [9,10,11,12] + }; + + EXPECT_TENSOR_EQ( + out, + tf_bool.make({3, 4}, expected_data)); +} diff --git a/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl index 433072d78ba..5f09278c500 100644 --- a/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl +++ b/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl @@ -206,8 +206,11 @@ OPTIMIZED_ATEN_OPS = ( op_target( name = "op_le", deps = [ + ":binary_ops", "//executorch/kernels/portable/cpu:scalar_utils", "//executorch/kernels/portable/cpu/util:broadcast_util", + "//executorch/kernels/portable/cpu/pattern:comparison_op", + "//executorch/kernels/portable/cpu/util:elementwise_util", ], ), op_target( From 07cd1023e20c522a9f38d76297415560e3f1d830 Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Wed, 11 Jun 2025 14:54:11 -0700 Subject: [PATCH 2/6] Update on "[Exutorch] Add broadcast support for le op" For refactored hf repro requires this to support mask generation Differential Revision: [D76456398](https://our.internmc.facebook.com/intern/diff/D76456398/) [ghstack-poisoned] --- kernels/optimized/cpu/op_le.cpp | 6 ++- kernels/test/op_le_test.cpp | 77 ++++++++++++++++----------------- 2 files changed, 41 insertions(+), 42 deletions(-) diff --git a/kernels/optimized/cpu/op_le.cpp b/kernels/optimized/cpu/op_le.cpp index 2513155aac1..095e6fd0cfb 100644 --- a/kernels/optimized/cpu/op_le.cpp +++ b/kernels/optimized/cpu/op_le.cpp @@ -9,11 +9,11 @@ #include #include #include +#include #include #include #include #include -#include namespace torch { namespace executor { @@ -83,7 +83,9 @@ Tensor& opt_le_tensor_out( // Check for optimized broadcast paths auto selected_optimized_path = select_optimized_path(a, b, out); - printf("selected_optimized_path: %d\n", static_cast(selected_optimized_path)); + printf( + "selected_optimized_path: %d\n", + static_cast(selected_optimized_path)); if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) { // Resize for dynamic shape auto error = resize_tensor(out, a.sizes()); diff --git a/kernels/test/op_le_test.cpp b/kernels/test/op_le_test.cpp index eebad43ce75..070a397dfbc 100644 --- a/kernels/test/op_le_test.cpp +++ b/kernels/test/op_le_test.cpp @@ -204,7 +204,9 @@ TEST_F(OpLeTensorOutTest, Broadcast2DTest) { // Row 0: b[0]=2, so [1<=2, 2<=2, 3<=2, ...] = [true, true, false, false, ...] // Row 1: b[1]=4, so [1<=4, 2<=4, 3<=4, 4<=4, 5<=4, ...] = [true, true, true, // true, false, ...] - using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; std::vector expected_data = { // Row 0 (b=2): 1<=2, 2<=2, 3<=2, 4<=2, 5<=2, 6<=2, 7<=2, 8<=2, 9<=2, // 10<=2 @@ -279,10 +281,7 @@ TEST_F(OpLeTensorOutTest, Broadcast2DTest) { true, true}; - EXPECT_TENSOR_EQ( - out, - tf_bool.make( - {6, 10}, expected_data)); + EXPECT_TENSOR_EQ(out, tf_bool.make({6, 10}, expected_data)); } TEST_F(OpLeTensorOutTest, Broadcast1DTo2DTest) { @@ -298,7 +297,9 @@ TEST_F(OpLeTensorOutTest, Broadcast1DTo2DTest) { op_le_tensor_out(a, b, out); - using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; std::vector expected_data = { // Row 0 (a=2): 2<=1, 2<=2, 2<=3, 2<=4, 2<=5, 2<=6, 2<=7, 2<=8, 2<=9, // 2<=10 @@ -373,10 +374,7 @@ TEST_F(OpLeTensorOutTest, Broadcast1DTo2DTest) { false, false}; - EXPECT_TENSOR_EQ( - out, - tf_bool.make( - {6, 10}, expected_data)); + EXPECT_TENSOR_EQ(out, tf_bool.make({6, 10}, expected_data)); } TEST_F(OpLeTensorOutTest, BroadcastReverseTest) { @@ -393,7 +391,9 @@ TEST_F(OpLeTensorOutTest, BroadcastReverseTest) { op_le_tensor_out(a, b, out); // Expected: each row i should be [a[i]<=1, a[i]<=2, ..., a[i]<=10] - using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; std::vector expected_data = { // Row 0 (a=2): 2<=1, 2<=2, 2<=3, 2<=4, 2<=5, 2<=6, 2<=7, 2<=8, 2<=9, // 2<=10 @@ -468,10 +468,7 @@ TEST_F(OpLeTensorOutTest, BroadcastReverseTest) { false, false}; - EXPECT_TENSOR_EQ( - out, - tf_bool.make( - {6, 10}, expected_data)); + EXPECT_TENSOR_EQ(out, tf_bool.make({6, 10}, expected_data)); } TEST_F(OpLeTensorOutTest, BroadcastLastDimTest) { @@ -553,7 +550,9 @@ TEST_F(OpLeTensorOutTest, BroadcastLastDimTest) { op_le_tensor_out(a, b, out); - using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; std::vector expected_data = { // First slice: a values are 1,2,3,4 true, @@ -617,12 +616,10 @@ TEST_F(OpLeTensorOutTest, BroadcastLastDimTest) { true, true, true, - true// 12 <= [12,13,14,15,16] + true // 12 <= [12,13,14,15,16] }; - EXPECT_TENSOR_EQ( - out, - tf_bool.make({3, 4, 5}, expected_data)); + EXPECT_TENSOR_EQ(out, tf_bool.make({3, 4, 5}, expected_data)); } TEST_F(OpLeTensorOutTest, BroadcastLastDimReverseTest) { @@ -706,7 +703,9 @@ TEST_F(OpLeTensorOutTest, BroadcastLastDimReverseTest) { op_le_tensor_out(a, b, out); - using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; std::vector expected_data = { // First slice: b values are all 5 true, @@ -773,9 +772,7 @@ TEST_F(OpLeTensorOutTest, BroadcastLastDimReverseTest) { false // [12,13,14,15,16] <= 15 }; - EXPECT_TENSOR_EQ( - out, - tf_bool.make({3, 4, 5}, expected_data)); + EXPECT_TENSOR_EQ(out, tf_bool.make({3, 4, 5}, expected_data)); } TEST_F(OpLeTensorOutTest, BroadcastNdByNdTest) { @@ -819,7 +816,9 @@ TEST_F(OpLeTensorOutTest, BroadcastNdByNdTest) { op_le_tensor_out(a, b, out); - using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; std::vector expected_data = { // First slice: a[0,0,:] = [1,2,3,4] true, @@ -849,9 +848,7 @@ TEST_F(OpLeTensorOutTest, BroadcastNdByNdTest) { true // [5,6,7,8] <= [7,8,9,10] }; - EXPECT_TENSOR_EQ( - out, - tf_bool.make({2, 3, 4}, expected_data)); + EXPECT_TENSOR_EQ(out, tf_bool.make({2, 3, 4}, expected_data)); } TEST_F(OpLeTensorOutTest, BroadcastNdByNdReverseTest) { @@ -896,7 +893,9 @@ TEST_F(OpLeTensorOutTest, BroadcastNdByNdReverseTest) { op_le_tensor_out(a, b, out); - using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; std::vector expected_data = { // First slice: b[0,0,:] = [2,3,4,5] true, @@ -926,9 +925,7 @@ TEST_F(OpLeTensorOutTest, BroadcastNdByNdReverseTest) { false // [7,8,9,10] <= [6,7,8,9] }; - EXPECT_TENSOR_EQ( - out, - tf_bool.make({2, 3, 4}, expected_data)); + EXPECT_TENSOR_EQ(out, tf_bool.make({2, 3, 4}, expected_data)); } TEST_F(OpLeTensorOutTest, Broadcast2dBy1dTest) { @@ -944,7 +941,9 @@ TEST_F(OpLeTensorOutTest, Broadcast2dBy1dTest) { op_le_tensor_out(a, b, out); - using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; std::vector expected_data = { true, true, @@ -960,9 +959,7 @@ TEST_F(OpLeTensorOutTest, Broadcast2dBy1dTest) { false // [9,10,11,12] <= [2,4,6,8] }; - EXPECT_TENSOR_EQ( - out, - tf_bool.make({3, 4}, expected_data)); + EXPECT_TENSOR_EQ(out, tf_bool.make({3, 4}, expected_data)); } TEST_F(OpLeTensorOutTest, Broadcast2dBy1dReverseTest) { @@ -978,7 +975,9 @@ TEST_F(OpLeTensorOutTest, Broadcast2dBy1dReverseTest) { op_le_tensor_out(a, b, out); - using ctype = executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper::ctype; + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; std::vector expected_data = { false, false, @@ -994,7 +993,5 @@ TEST_F(OpLeTensorOutTest, Broadcast2dBy1dReverseTest) { true // [2,4,6,8] <= [9,10,11,12] }; - EXPECT_TENSOR_EQ( - out, - tf_bool.make({3, 4}, expected_data)); + EXPECT_TENSOR_EQ(out, tf_bool.make({3, 4}, expected_data)); } From ccda16eb24ddfae25cab6f4145dedc0dedc9d9ed Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Wed, 11 Jun 2025 16:16:53 -0700 Subject: [PATCH 3/6] Update on "[Exutorch] Add broadcast support for le op" For refactored hf repro requires this to support mask generation Differential Revision: [D76456398](https://our.internmc.facebook.com/intern/diff/D76456398/) [ghstack-poisoned] --- kernels/optimized/cpu/op_le.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernels/optimized/cpu/op_le.cpp b/kernels/optimized/cpu/op_le.cpp index 095e6fd0cfb..f5dad1c2c98 100644 --- a/kernels/optimized/cpu/op_le.cpp +++ b/kernels/optimized/cpu/op_le.cpp @@ -83,9 +83,6 @@ Tensor& opt_le_tensor_out( // Check for optimized broadcast paths auto selected_optimized_path = select_optimized_path(a, b, out); - printf( - "selected_optimized_path: %d\n", - static_cast(selected_optimized_path)); if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) { // Resize for dynamic shape auto error = resize_tensor(out, a.sizes()); From eeb437591c2c7516cb309a3a91e1bc4f0cc19faa Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Wed, 11 Jun 2025 17:34:44 -0700 Subject: [PATCH 4/6] Update on "[Exutorch] Add broadcast support for le op" For refactored hf repro requires this to support mask generation Differential Revision: [D76456398](https://our.internmc.facebook.com/intern/diff/D76456398/) [ghstack-poisoned] --- kernels/optimized/cpu/op_le.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/kernels/optimized/cpu/op_le.cpp b/kernels/optimized/cpu/op_le.cpp index f5dad1c2c98..0603fdf4716 100644 --- a/kernels/optimized/cpu/op_le.cpp +++ b/kernels/optimized/cpu/op_le.cpp @@ -105,7 +105,6 @@ Tensor& opt_le_tensor_out( } else if (selected_optimized_path != ElementwiseOptimizedPath::kNone) { // Handle optimized broadcast cases ET_SWITCH_REALB_TYPES(out_type, ctx, "le.Tensor_out", CTYPE, [&]() { - using Vec = executorch::vec::Vectorized; auto le_lambda = [](auto x, auto y) { return x.le(y); }; return torch::executor::handle_broadcast_elementwise( ctx, le_lambda, a, b, out, selected_optimized_path); From 0e17f38c9c2782d31b0491aaf0815f2de4235fee Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Thu, 12 Jun 2025 10:05:28 -0700 Subject: [PATCH 5/6] Update on "[Exutorch] Add broadcast support for le op" For refactored hf repro requires this to support mask generation Differential Revision: [D76456398](https://our.internmc.facebook.com/intern/diff/D76456398/) [ghstack-poisoned] --- kernels/optimized/cpu/op_le.cpp | 2 +- kernels/test/op_le_test.cpp | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/kernels/optimized/cpu/op_le.cpp b/kernels/optimized/cpu/op_le.cpp index 0603fdf4716..1ce45558c6a 100644 --- a/kernels/optimized/cpu/op_le.cpp +++ b/kernels/optimized/cpu/op_le.cpp @@ -85,7 +85,7 @@ Tensor& opt_le_tensor_out( auto selected_optimized_path = select_optimized_path(a, b, out); if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) { // Resize for dynamic shape - auto error = resize_tensor(out, a.sizes()); + auto error = resize_to_broadcast_target_size(a, b, out); ET_KERNEL_CHECK_MSG( ctx, error == Error::Ok, diff --git a/kernels/test/op_le_test.cpp b/kernels/test/op_le_test.cpp index 070a397dfbc..d96d87be596 100644 --- a/kernels/test/op_le_test.cpp +++ b/kernels/test/op_le_test.cpp @@ -962,6 +962,35 @@ TEST_F(OpLeTensorOutTest, Broadcast2dBy1dTest) { EXPECT_TENSOR_EQ(out, tf_bool.make({3, 4}, expected_data)); } +TEST_F(OpLeTensorOutTest, Broadcast1DTo2DShapeTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case: (6,) and (1, 6) -> (1, 6) + Tensor a = tf.make({6}, {1, 3, 5, 7, 9, 11}); + Tensor b = tf.make({1, 6}, {2, 4, 6, 8, 10, 12}); + + Tensor out = tf_bool.zeros({1, 6}); + + op_le_tensor_out(a, b, out); + + // Expected: a[i] <= b[0,i] for all i + // [1, 3, 5, 7, 9, 11] <= [2, 4, 6, 8, 10, 12] + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + true, // 1 <= 2 + true, // 3 <= 4 + true, // 5 <= 6 + true, // 7 <= 8 + true, // 9 <= 10 + true // 11 <= 12 + }; + + EXPECT_TENSOR_EQ(out, tf_bool.make({1, 6}, expected_data)); +} + TEST_F(OpLeTensorOutTest, Broadcast2dBy1dReverseTest) { TensorFactory tf; TensorFactory tf_bool; From 35c8c8d0fa8f5a72a5027738870b3090d640ff2d Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Thu, 12 Jun 2025 10:07:12 -0700 Subject: [PATCH 6/6] Update on "[Exutorch] Add broadcast support for le op" For refactored hf repro requires this to support mask generation Differential Revision: [D76456398](https://our.internmc.facebook.com/intern/diff/D76456398/) [ghstack-poisoned] --- kernels/test/op_le_test.cpp | 90 ++++++++++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/kernels/test/op_le_test.cpp b/kernels/test/op_le_test.cpp index d96d87be596..d8ecec11c46 100644 --- a/kernels/test/op_le_test.cpp +++ b/kernels/test/op_le_test.cpp @@ -991,7 +991,95 @@ TEST_F(OpLeTensorOutTest, Broadcast1DTo2DShapeTest) { EXPECT_TENSOR_EQ(out, tf_bool.make({1, 6}, expected_data)); } -TEST_F(OpLeTensorOutTest, Broadcast2dBy1dReverseTest) { +TEST_F(OpLeTensorOutTest, Broadcast2DBy1DShapeTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case: (10,) and (6, 1) -> (6, 10) + Tensor a = tf.make({10}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + Tensor b = tf.make({6, 1}, {2, 4, 6, 8, 10, 12}); + + Tensor out = tf_bool.zeros({6, 10}); + + op_le_tensor_out(a, b, out); + + // Expected: a[j] <= b[i,0] for all i,j + // Each row i should be [a[0]<=b[i,0], a[1]<=b[i,0], ..., a[9]<=b[i,0]] + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + // Row 0 (b=2): [1,2,3,4,5,6,7,8,9,10] <= 2 + true, + true, + false, + false, + false, + false, + false, + false, + false, + false, + // Row 1 (b=4): [1,2,3,4,5,6,7,8,9,10] <= 4 + true, + true, + true, + true, + false, + false, + false, + false, + false, + false, + // Row 2 (b=6): [1,2,3,4,5,6,7,8,9,10] <= 6 + true, + true, + true, + true, + true, + true, + false, + false, + false, + false, + // Row 3 (b=8): [1,2,3,4,5,6,7,8,9,10] <= 8 + true, + true, + true, + true, + true, + true, + true, + true, + false, + false, + // Row 4 (b=10): [1,2,3,4,5,6,7,8,9,10] <= 10 + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + // Row 5 (b=12): [1,2,3,4,5,6,7,8,9,10] <= 12 + true, + true, + true, + true, + true, + true, + true, + true, + true, + true}; + + EXPECT_TENSOR_EQ(out, tf_bool.make({6, 10}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, Broadcast22dBy1dReverseTest) { TensorFactory tf; TensorFactory tf_bool;