diff --git a/kernels/optimized/cpu/op_le.cpp b/kernels/optimized/cpu/op_le.cpp index 8cadd40a8da..4aeeb69323f 100644 --- a/kernels/optimized/cpu/op_le.cpp +++ b/kernels/optimized/cpu/op_le.cpp @@ -8,6 +8,8 @@ #include #include +#include +#include #include #include #include @@ -79,52 +81,39 @@ Tensor& opt_le_tensor_out( return out; } - ET_KERNEL_CHECK(ctx, tensors_have_same_shape(a, b), InvalidArgument, out); - - // Resize for dynamic shape - auto error = resize_tensor(out, a.sizes()); - ET_KERNEL_CHECK_MSG( - ctx, - error == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - - if (a_type == b_type && a_type == out_type) { - ET_SWITCH_REAL_TYPES_AND( - Bool, out_type, ctx, "le.Tensor_out", CTYPE, [&]() { - using Vec = at::vec::Vectorized; - at::vec::map2( - [](Vec x, Vec y) { return x.le(y); }, - out.mutable_data_ptr(), - a.const_data_ptr(), - b.const_data_ptr(), - a.numel()); - }); + // Check for optimized broadcast paths + auto selected_optimized_path = select_optimized_path(a, b, out); + if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) { + // Resize for dynamic shape + auto error = resize_to_broadcast_target_size(a, b, out); + ET_KERNEL_CHECK_MSG( + ctx, + error == Error::Ok, + InvalidArgument, + out, + "Failed to resize output tensor."); + + ET_SWITCH_REALB_TYPES(a_type, ctx, "le.Tensor_out", CTYPE, [&]() { + using Vec = at::vec::Vectorized; + at::vec::map2( + [](Vec x, Vec y) { return x.le(y); }, + out.mutable_data_ptr(), + a.const_data_ptr(), + b.const_data_ptr(), + out.numel()); + }); + } else if (selected_optimized_path != ElementwiseOptimizedPath::kNone) { + // Handle optimized broadcast cases + ET_SWITCH_REALB_TYPES(out_type, ctx, "le.Tensor_out", CTYPE, [&]() { + auto le_lambda = [](auto x, auto y) { return x.le(y); }; + return torch::executor::handle_broadcast_elementwise( + ctx, le_lambda, a, b, out, selected_optimized_path); + }); } else { - ET_SWITCH_REAL_TYPES_AND( - Bool, a_type, ctx, "le.Tensor_out", CTYPE_A, [&]() { - ET_SWITCH_REAL_TYPES_AND( - Bool, b_type, ctx, "le.Tensor_out", CTYPE_B, [&]() { - using CTYPE_IN = typename torch::executor:: - promote_types::type; - ET_DCHECK( - CppTypeToScalarType::value == - promoteTypes(a_type, b_type)); - ET_SWITCH_REAL_TYPES_AND( - Bool, out_type, ctx, "le.Tensor_out", CTYPE_OUT, [&]() { - const size_t n = a.numel(); - const CTYPE_A* a_data = a.const_data_ptr(); - const CTYPE_B* b_data = b.const_data_ptr(); - CTYPE_OUT* out_data = out.mutable_data_ptr(); - for (auto i = 0; i < n; ++i) { - out_data[i] = static_cast( - static_cast(a_data[i]) <= - static_cast(b_data[i])); - } - }); - }); - }); + // @lint-ignore CLANGTIDY facebook-hte-CArray + static constexpr const char op_name[] = "le.Tensor_out"; + return internal::comparison_tensor_out( + ctx, a, b, out); } return out; diff --git a/kernels/test/op_le_test.cpp b/kernels/test/op_le_test.cpp index bcd40d24d89..d8ecec11c46 100644 --- a/kernels/test/op_le_test.cpp +++ b/kernels/test/op_le_test.cpp @@ -186,3 +186,929 @@ TEST_F(OpLeTensorOutTest, BroadcastTest) { op_le_tensor_out(a, b, out); EXPECT_TENSOR_EQ(out, tf.make({1, 4}, {true, true, true, false})); } + +TEST_F(OpLeTensorOutTest, Broadcast2DTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case: (1, 10) and (6, 1) -> (6, 10) + Tensor a = + tf.make(/*sizes=*/{1, 10}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + Tensor b = tf.make({6, 1}, {2, 4, 6, 8, 10, 12}); + + Tensor out = tf_bool.zeros({6, 10}); + + op_le_tensor_out(a, b, out); + + // Expected: each row i should be [1<=b[i], 2<=b[i], ..., 10<=b[i]] + // Row 0: b[0]=2, so [1<=2, 2<=2, 3<=2, ...] = [true, true, false, false, ...] + // Row 1: b[1]=4, so [1<=4, 2<=4, 3<=4, 4<=4, 5<=4, ...] = [true, true, true, + // true, false, ...] + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + // Row 0 (b=2): 1<=2, 2<=2, 3<=2, 4<=2, 5<=2, 6<=2, 7<=2, 8<=2, 9<=2, + // 10<=2 + true, + true, + false, + false, + false, + false, + false, + false, + false, + false, + // Row 1 (b=4): 1<=4, 2<=4, 3<=4, 4<=4, 5<=4, 6<=4, 7<=4, 8<=4, 9<=4, + // 10<=4 + true, + true, + true, + true, + false, + false, + false, + false, + false, + false, + // Row 2 (b=6): 1<=6, 2<=6, 3<=6, 4<=6, 5<=6, 6<=6, 7<=6, 8<=6, 9<=6, + // 10<=6 + true, + true, + true, + true, + true, + true, + false, + false, + false, + false, + // Row 3 (b=8): 1<=8, 2<=8, 3<=8, 4<=8, 5<=8, 6<=8, 7<=8, 8<=8, 9<=8, + // 10<=8 + true, + true, + true, + true, + true, + true, + true, + true, + false, + false, + // Row 4 (b=10): 1<=10, 2<=10, 3<=10, 4<=10, 5<=10, 6<=10, 7<=10, 8<=10, + // 9<=10, 10<=10 + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + // Row 5 (b=12): 1<=12, 2<=12, 3<=12, 4<=12, 5<=12, 6<=12, 7<=12, 8<=12, + // 9<=12, 10<=12 + true, + true, + true, + true, + true, + true, + true, + true, + true, + true}; + + EXPECT_TENSOR_EQ(out, tf_bool.make({6, 10}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, Broadcast1DTo2DTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case: (6,) and (1, 10) -> (6, 10) + Tensor a = tf.make({6, 1}, {2, 4, 6, 8, 10, 12}); + Tensor b = + tf.make(/*sizes=*/{1, 10}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + + Tensor out = tf_bool.zeros({6, 10}); + + op_le_tensor_out(a, b, out); + + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + // Row 0 (a=2): 2<=1, 2<=2, 2<=3, 2<=4, 2<=5, 2<=6, 2<=7, 2<=8, 2<=9, + // 2<=10 + false, + true, + true, + true, + true, + true, + true, + true, + true, + true, + // Row 1 (a=4): 4<=1, 4<=2, 4<=3, 4<=4, 4<=5, 4<=6, 4<=7, 4<=8, 4<=9, + // 4<=10 + false, + false, + false, + true, + true, + true, + true, + true, + true, + true, + // Row 2 (a=6): 6<=1, 6<=2, 6<=3, 6<=4, 6<=5, 6<=6, 6<=7, 6<=8, 6<=9, + // 6<=10 + false, + false, + false, + false, + false, + true, + true, + true, + true, + true, + // Row 3 (a=8): 8<=1, 8<=2, 8<=3, 8<=4, 8<=5, 8<=6, 8<=7, 8<=8, 8<=9, + // 8<=10 + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + // Row 4 (a=10): 10<=1, 10<=2, 10<=3, 10<=4, 10<=5, 10<=6, 10<=7, 10<=8, + // 10<=9, 10<=10 + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + // Row 5 (a=12): 12<=1, 12<=2, 12<=3, 12<=4, 12<=5, 12<=6, 12<=7, 12<=8, + // 12<=9, 12<=10 + false, + false, + false, + false, + false, + false, + false, + false, + false, + false}; + + EXPECT_TENSOR_EQ(out, tf_bool.make({6, 10}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, BroadcastReverseTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case: (6, 1) and (1, 10) -> (6, 10) (reverse of the first broadcast + // test) + Tensor a = tf.make(/*sizes=*/{6, 1}, /*data=*/{2, 4, 6, 8, 10, 12}); + Tensor b = tf.make({1, 10}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + + Tensor out = tf_bool.zeros({6, 10}); + + op_le_tensor_out(a, b, out); + + // Expected: each row i should be [a[i]<=1, a[i]<=2, ..., a[i]<=10] + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + // Row 0 (a=2): 2<=1, 2<=2, 2<=3, 2<=4, 2<=5, 2<=6, 2<=7, 2<=8, 2<=9, + // 2<=10 + false, + true, + true, + true, + true, + true, + true, + true, + true, + true, + // Row 1 (a=4): 4<=1, 4<=2, 4<=3, 4<=4, 4<=5, 4<=6, 4<=7, 4<=8, 4<=9, + // 4<=10 + false, + false, + false, + true, + true, + true, + true, + true, + true, + true, + // Row 2 (a=6): 6<=1, 6<=2, 6<=3, 6<=4, 6<=5, 6<=6, 6<=7, 6<=8, 6<=9, + // 6<=10 + false, + false, + false, + false, + false, + true, + true, + true, + true, + true, + // Row 3 (a=8): 8<=1, 8<=2, 8<=3, 8<=4, 8<=5, 8<=6, 8<=7, 8<=8, 8<=9, + // 8<=10 + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + // Row 4 (a=10): 10<=1, 10<=2, 10<=3, 10<=4, 10<=5, 10<=6, 10<=7, 10<=8, + // 10<=9, 10<=10 + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + // Row 5 (a=12): 12<=1, 12<=2, 12<=3, 12<=4, 12<=5, 12<=6, 12<=7, 12<=8, + // 12<=9, 12<=10 + false, + false, + false, + false, + false, + false, + false, + false, + false, + false}; + + EXPECT_TENSOR_EQ(out, tf_bool.make({6, 10}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, BroadcastLastDimTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcastLastDim: (3, 4, 1) and (3, 4, 5) -> (3, 4, 5) + Tensor a = tf.make( + /*sizes=*/{3, 4, 1}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor b = tf.make( + {3, 4, 5}, + { + // First 3x4 slice + 1, + 2, + 3, + 4, + 5, // row 0 + 2, + 3, + 4, + 5, + 6, // row 1 + 3, + 4, + 5, + 6, + 7, // row 2 + 4, + 5, + 6, + 7, + 8, // row 3 + // Second 3x4 slice + 5, + 6, + 7, + 8, + 9, // row 0 + 6, + 7, + 8, + 9, + 10, // row 1 + 7, + 8, + 9, + 10, + 11, // row 2 + 8, + 9, + 10, + 11, + 12, // row 3 + // Third 3x4 slice + 9, + 10, + 11, + 12, + 13, // row 0 + 10, + 11, + 12, + 13, + 14, // row 1 + 11, + 12, + 13, + 14, + 15, // row 2 + 12, + 13, + 14, + 15, + 16 // row 3 + }); + + Tensor out = tf_bool.zeros({3, 4, 5}); + + op_le_tensor_out(a, b, out); + + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + // First slice: a values are 1,2,3,4 + true, + true, + true, + true, + true, // 1 <= [1,2,3,4,5] + true, + true, + true, + true, + true, // 2 <= [2,3,4,5,6] + true, + true, + true, + true, + true, // 3 <= [3,4,5,6,7] + true, + true, + true, + true, + true, // 4 <= [4,5,6,7,8] + // Second slice: a values are 5,6,7,8 + true, + true, + true, + true, + true, // 5 <= [5,6,7,8,9] + true, + true, + true, + true, + true, // 6 <= [6,7,8,9,10] + true, + true, + true, + true, + true, // 7 <= [7,8,9,10,11] + true, + true, + true, + true, + true, // 8 <= [8,9,10,11,12] + // Third slice: a values are 9,10,11,12 + true, + true, + true, + true, + true, // 9 <= [9,10,11,12,13] + true, + true, + true, + true, + true, // 10 <= [10,11,12,13,14] + true, + true, + true, + true, + true, // 11 <= [11,12,13,14,15] + true, + true, + true, + true, + true // 12 <= [12,13,14,15,16] + }; + + EXPECT_TENSOR_EQ(out, tf_bool.make({3, 4, 5}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, BroadcastLastDimReverseTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcastLastDimReverseArguments: (3, 4, 5) and (3, 4, 1) -> + // (3, 4, 5) + Tensor a = tf.make( + {3, 4, 5}, + { + // First 3x4 slice + 1, + 2, + 3, + 4, + 5, // row 0 + 2, + 3, + 4, + 5, + 6, // row 1 + 3, + 4, + 5, + 6, + 7, // row 2 + 4, + 5, + 6, + 7, + 8, // row 3 + // Second 3x4 slice + 5, + 6, + 7, + 8, + 9, // row 0 + 6, + 7, + 8, + 9, + 10, // row 1 + 7, + 8, + 9, + 10, + 11, // row 2 + 8, + 9, + 10, + 11, + 12, // row 3 + // Third 3x4 slice + 9, + 10, + 11, + 12, + 13, // row 0 + 10, + 11, + 12, + 13, + 14, // row 1 + 11, + 12, + 13, + 14, + 15, // row 2 + 12, + 13, + 14, + 15, + 16 // row 3 + }); + Tensor b = tf.make( + /*sizes=*/{3, 4, 1}, + /*data=*/{5, 5, 5, 5, 10, 10, 10, 10, 15, 15, 15, 15}); + + Tensor out = tf_bool.zeros({3, 4, 5}); + + op_le_tensor_out(a, b, out); + + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + // First slice: b values are all 5 + true, + true, + true, + true, + true, // [1,2,3,4,5] <= 5 + true, + true, + true, + true, + false, // [2,3,4,5,6] <= 5 + true, + true, + true, + false, + false, // [3,4,5,6,7] <= 5 + true, + true, + false, + false, + false, // [4,5,6,7,8] <= 5 + // Second slice: b values are all 10 + true, + true, + true, + true, + true, // [5,6,7,8,9] <= 10 + true, + true, + true, + true, + true, // [6,7,8,9,10] <= 10 + true, + true, + true, + true, + false, // [7,8,9,10,11] <= 10 + true, + true, + true, + false, + false, // [8,9,10,11,12] <= 10 + // Third slice: b values are all 15 + true, + true, + true, + true, + true, // [9,10,11,12,13] <= 15 + true, + true, + true, + true, + true, // [10,11,12,13,14] <= 15 + true, + true, + true, + true, + true, // [11,12,13,14,15] <= 15 + true, + true, + true, + true, + false // [12,13,14,15,16] <= 15 + }; + + EXPECT_TENSOR_EQ(out, tf_bool.make({3, 4, 5}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, BroadcastNdByNdTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcastNdByNd: (2, 1, 4) and (2, 3, 4) -> (2, 3, 4) + Tensor a = tf.make(/*sizes=*/{2, 1, 4}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8}); + Tensor b = tf.make( + {2, 3, 4}, + { + // First 2x3 slice + 1, + 2, + 3, + 4, // row 0 + 2, + 3, + 4, + 5, // row 1 + 3, + 4, + 5, + 6, // row 2 + // Second 2x3 slice + 5, + 6, + 7, + 8, // row 0 + 6, + 7, + 8, + 9, // row 1 + 7, + 8, + 9, + 10 // row 2 + }); + + Tensor out = tf_bool.zeros({2, 3, 4}); + + op_le_tensor_out(a, b, out); + + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + // First slice: a[0,0,:] = [1,2,3,4] + true, + true, + true, + true, // [1,2,3,4] <= [1,2,3,4] + true, + true, + true, + true, // [1,2,3,4] <= [2,3,4,5] + true, + true, + true, + true, // [1,2,3,4] <= [3,4,5,6] + // Second slice: a[1,0,:] = [5,6,7,8] + true, + true, + true, + true, // [5,6,7,8] <= [5,6,7,8] + true, + true, + true, + true, // [5,6,7,8] <= [6,7,8,9] + true, + true, + true, + true // [5,6,7,8] <= [7,8,9,10] + }; + + EXPECT_TENSOR_EQ(out, tf_bool.make({2, 3, 4}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, BroadcastNdByNdReverseTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcastNdByNdReverseArguments: (2, 3, 4) and (2, 1, 4) -> + // (2, 3, 4) + Tensor a = tf.make( + {2, 3, 4}, + { + // First 2x3 slice + 1, + 2, + 3, + 4, // row 0 + 2, + 3, + 4, + 5, // row 1 + 3, + 4, + 5, + 6, // row 2 + // Second 2x3 slice + 5, + 6, + 7, + 8, // row 0 + 6, + 7, + 8, + 9, // row 1 + 7, + 8, + 9, + 10 // row 2 + }); + Tensor b = tf.make(/*sizes=*/{2, 1, 4}, /*data=*/{2, 3, 4, 5, 6, 7, 8, 9}); + + Tensor out = tf_bool.zeros({2, 3, 4}); + + op_le_tensor_out(a, b, out); + + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + // First slice: b[0,0,:] = [2,3,4,5] + true, + true, + true, + true, // [1,2,3,4] <= [2,3,4,5] + true, + true, + true, + true, // [2,3,4,5] <= [2,3,4,5] + false, + false, + false, + false, // [3,4,5,6] <= [2,3,4,5] + // Second slice: b[1,0,:] = [6,7,8,9] + true, + true, + true, + true, // [5,6,7,8] <= [6,7,8,9] + true, + true, + true, + true, // [6,7,8,9] <= [6,7,8,9] + false, + false, + false, + false // [7,8,9,10] <= [6,7,8,9] + }; + + EXPECT_TENSOR_EQ(out, tf_bool.make({2, 3, 4}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, Broadcast2dBy1dTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcast2dBy1d: (3, 4) and (4,) -> (3, 4) + Tensor a = tf.make( + /*sizes=*/{3, 4}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor b = tf.make({4}, {2, 4, 6, 8}); + + Tensor out = tf_bool.zeros({3, 4}); + + op_le_tensor_out(a, b, out); + + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + true, + true, + true, + true, // [1,2,3,4] <= [2,4,6,8] + false, + false, + false, + true, // [5,6,7,8] <= [2,4,6,8] + false, + false, + false, + false // [9,10,11,12] <= [2,4,6,8] + }; + + EXPECT_TENSOR_EQ(out, tf_bool.make({3, 4}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, Broadcast1DTo2DShapeTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case: (6,) and (1, 6) -> (1, 6) + Tensor a = tf.make({6}, {1, 3, 5, 7, 9, 11}); + Tensor b = tf.make({1, 6}, {2, 4, 6, 8, 10, 12}); + + Tensor out = tf_bool.zeros({1, 6}); + + op_le_tensor_out(a, b, out); + + // Expected: a[i] <= b[0,i] for all i + // [1, 3, 5, 7, 9, 11] <= [2, 4, 6, 8, 10, 12] + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + true, // 1 <= 2 + true, // 3 <= 4 + true, // 5 <= 6 + true, // 7 <= 8 + true, // 9 <= 10 + true // 11 <= 12 + }; + + EXPECT_TENSOR_EQ(out, tf_bool.make({1, 6}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, Broadcast2DBy1DShapeTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case: (10,) and (6, 1) -> (6, 10) + Tensor a = tf.make({10}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + Tensor b = tf.make({6, 1}, {2, 4, 6, 8, 10, 12}); + + Tensor out = tf_bool.zeros({6, 10}); + + op_le_tensor_out(a, b, out); + + // Expected: a[j] <= b[i,0] for all i,j + // Each row i should be [a[0]<=b[i,0], a[1]<=b[i,0], ..., a[9]<=b[i,0]] + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + // Row 0 (b=2): [1,2,3,4,5,6,7,8,9,10] <= 2 + true, + true, + false, + false, + false, + false, + false, + false, + false, + false, + // Row 1 (b=4): [1,2,3,4,5,6,7,8,9,10] <= 4 + true, + true, + true, + true, + false, + false, + false, + false, + false, + false, + // Row 2 (b=6): [1,2,3,4,5,6,7,8,9,10] <= 6 + true, + true, + true, + true, + true, + true, + false, + false, + false, + false, + // Row 3 (b=8): [1,2,3,4,5,6,7,8,9,10] <= 8 + true, + true, + true, + true, + true, + true, + true, + true, + false, + false, + // Row 4 (b=10): [1,2,3,4,5,6,7,8,9,10] <= 10 + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + // Row 5 (b=12): [1,2,3,4,5,6,7,8,9,10] <= 12 + true, + true, + true, + true, + true, + true, + true, + true, + true, + true}; + + EXPECT_TENSOR_EQ(out, tf_bool.make({6, 10}, expected_data)); +} + +TEST_F(OpLeTensorOutTest, Broadcast22dBy1dReverseTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case for kBroadcast2dBy1dReverseArguments: (4,) and (3, 4) -> (3, 4) + Tensor a = tf.make({4}, {2, 4, 6, 8}); + Tensor b = tf.make( + /*sizes=*/{3, 4}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + + Tensor out = tf_bool.zeros({3, 4}); + + op_le_tensor_out(a, b, out); + + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data = { + false, + false, + false, + false, // [2,4,6,8] <= [1,2,3,4] + true, + true, + true, + true, // [2,4,6,8] <= [5,6,7,8] + true, + true, + true, + true // [2,4,6,8] <= [9,10,11,12] + }; + + EXPECT_TENSOR_EQ(out, tf_bool.make({3, 4}, expected_data)); +} diff --git a/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl index a95c3f6b368..4b49e966b9b 100644 --- a/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl +++ b/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl @@ -218,8 +218,11 @@ OPTIMIZED_ATEN_OPS = ( op_target( name = "op_le", deps = [ + ":binary_ops", "//executorch/kernels/portable/cpu:scalar_utils", "//executorch/kernels/portable/cpu/util:broadcast_util", + "//executorch/kernels/portable/cpu/pattern:comparison_op", + "//executorch/kernels/portable/cpu/util:elementwise_util", "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", ], ),