From 19572adeb56be2ae0aa16b725a8e9a45c9743888 Mon Sep 17 00:00:00 2001 From: Hazem Zaghloul Date: Tue, 26 Aug 2025 14:38:00 +0100 Subject: [PATCH 1/2] Add new wrapper value for removing duplicates from array values This adds a new value, ConstantArrayDistinctValue, which can be used to wrap an underlying constant array value to remove duplicates from the underlying array during evaluation. The purpose of this new value is to use it to wrap in-lists used in InJoin, InUnion and FlatMap plans to avoid duplicate records if the in-list contains duplicate values. --- .../values/ConstantArrayDistinctValue.java | 198 ++++++++++++++++++ .../src/main/proto/record_query_plan.proto | 5 + .../ConstantArrayDistinctValueTest.java | 113 ++++++++++ 3 files changed, 316 insertions(+) create mode 100644 fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/query/plan/cascades/values/ConstantArrayDistinctValue.java create mode 100644 fdb-record-layer-core/src/test/java/com/apple/foundationdb/record/query/plan/cascades/values/ConstantArrayDistinctValueTest.java diff --git a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/query/plan/cascades/values/ConstantArrayDistinctValue.java b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/query/plan/cascades/values/ConstantArrayDistinctValue.java new file mode 100644 index 0000000000..33f1f0efeb --- /dev/null +++ b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/query/plan/cascades/values/ConstantArrayDistinctValue.java @@ -0,0 +1,198 @@ +/* + * ConstantArrayDistinctValue.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2015-2025 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.record.query.plan.cascades.values; + +import com.apple.foundationdb.annotation.API; +import com.apple.foundationdb.annotation.SpotBugsSuppressWarnings; +import com.apple.foundationdb.record.EvaluationContext; +import com.apple.foundationdb.record.ObjectPlanHash; +import com.apple.foundationdb.record.PlanDeserializer; +import com.apple.foundationdb.record.PlanHashable; +import com.apple.foundationdb.record.PlanSerializationContext; +import com.apple.foundationdb.record.planprotos.PConstantArrayDistinctValue; +import com.apple.foundationdb.record.planprotos.PValue; +import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreBase; +import com.apple.foundationdb.record.query.plan.cascades.AliasMap; +import com.apple.foundationdb.record.query.plan.cascades.BuiltInFunction; +import com.apple.foundationdb.record.query.plan.cascades.SemanticException; +import com.apple.foundationdb.record.query.plan.cascades.typing.Type; +import com.apple.foundationdb.record.query.plan.cascades.typing.Typed; +import com.apple.foundationdb.record.query.plan.explain.ExplainTokens; +import com.apple.foundationdb.record.query.plan.explain.ExplainTokensWithPrecedence; +import com.google.auto.service.AutoService; +import com.google.common.base.Verify; +import com.google.common.collect.ImmutableList; +import com.google.protobuf.Message; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.List; +import java.util.Objects; +import java.util.function.Supplier; + + +/** + * A value that returns the array of the {@link Value} that is passed in with all duplicate elements removed. + * This value only supports underlying constant values that result in an array type, see {@link Value#isConstant()}. + */ +@API(API.Status.EXPERIMENTAL) +public class ConstantArrayDistinctValue extends AbstractValue implements ValueWithChild { + private static final ObjectPlanHash BASE_HASH = new ObjectPlanHash("Constant-Array-Distinct-Value"); + + @Nonnull + private final Value childValue; + @Nonnull + private final Type resultType; + + public ConstantArrayDistinctValue(@Nonnull final Value childValue) { + Verify.verify(childValue.isConstant()); + final var innerResultType = Objects.requireNonNull(childValue.getResultType()); + Verify.verify(innerResultType.isArray()); + this.childValue = childValue; + this.resultType = innerResultType; + } + + @Nonnull + @Override + public List computeChildren() { + return ImmutableList.of(childValue); + } + + @Nonnull + @Override + public Value getChild() { + return childValue; + } + + @Nonnull + @Override + public ValueWithChild withNewChild(@Nonnull final Value rebasedChild) { + return new ConstantArrayDistinctValue(rebasedChild); + } + + + @Nonnull + @Override + public Type getResultType() { + return resultType; + } + + @Override + public Object eval(@Nullable final FDBRecordStoreBase store, @Nonnull final EvaluationContext context) { + final var childResult = childValue.eval(store, context); + if (childResult == null) { + return null; + } + return ((List)childResult).stream().distinct().collect(ImmutableList.toImmutableList()); + } + + @Override + public int hashCodeWithoutChildren() { + return PlanHashable.objectsPlanHash(PlanHashable.CURRENT_FOR_CONTINUATION, BASE_HASH); + } + + @Override + public int planHash(@Nonnull final PlanHashMode mode) { + return PlanHashable.objectsPlanHash(mode, BASE_HASH, childValue); + } + + @Nonnull + @Override + public ExplainTokensWithPrecedence explain(@Nonnull final Iterable> explainSuppliers) { + return ExplainTokensWithPrecedence.of(new ExplainTokens().addFunctionCall("constantArrayDistinct", + Value.explainFunctionArguments(explainSuppliers))); + } + + @Override + public int hashCode() { + return semanticHashCode(); + } + + @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") + @SpotBugsSuppressWarnings("EQ_UNUSUAL") + @Override + public boolean equals(final Object other) { + return semanticEquals(other, AliasMap.emptyMap()); + } + + @Nonnull + private static Value encapsulateInternal(@Nonnull final List typedArgs) { + Verify.verify(typedArgs.size() == 1); + final var arg0 = typedArgs.get(0); + SemanticException.check( + arg0 instanceof Value && arg0.getResultType().isArray() && ((Value)arg0).isConstant(), + SemanticException.ErrorCode.FUNCTION_UNDEFINED_FOR_GIVEN_ARGUMENT_TYPES + ); + return new ConstantArrayDistinctValue((Value)arg0); + } + + @Nonnull + @Override + public PConstantArrayDistinctValue toProto(@Nonnull final PlanSerializationContext serializationContext) { + return PConstantArrayDistinctValue.newBuilder() + .setChildValue(childValue.toValueProto(serializationContext)) + .build(); + } + + @Nonnull + @Override + public PValue toValueProto(@Nonnull PlanSerializationContext serializationContext) { + return PValue.newBuilder().setConstantArrayDistinctValue(toProto(serializationContext)).build(); + } + + @Nonnull + public static ConstantArrayDistinctValue fromProto(@Nonnull final PlanSerializationContext serializationContext, + @Nonnull final PConstantArrayDistinctValue constantArrayDistinctValueProto) { + return new ConstantArrayDistinctValue( + Value.fromValueProto(serializationContext, Objects.requireNonNull(constantArrayDistinctValueProto.getChildValue())) + ); + } + + /** + * Deserializer. + */ + @AutoService(PlanDeserializer.class) + public static class Deserializer implements PlanDeserializer { + @Nonnull + @Override + public Class getProtoMessageClass() { + return PConstantArrayDistinctValue.class; + } + + @Nonnull + @Override + public ConstantArrayDistinctValue fromProto(@Nonnull final PlanSerializationContext serializationContext, + @Nonnull final PConstantArrayDistinctValue constantArrayDistinctValueProto) { + return ConstantArrayDistinctValue.fromProto(serializationContext, constantArrayDistinctValueProto); + } + } + + /** + * The {@code constant_array_distinct} function. + */ + @AutoService(BuiltInFunction.class) + public static class ConstantArrayDistinctFn extends BuiltInFunction { + public ConstantArrayDistinctFn() { + super("constant_array_distinct", + ImmutableList.of(), new Type.Array(), (builtInFunction, typedArgs) -> encapsulateInternal(typedArgs)); + } + } +} diff --git a/fdb-record-layer-core/src/main/proto/record_query_plan.proto b/fdb-record-layer-core/src/main/proto/record_query_plan.proto index ed3dce7683..c6b16a744c 100644 --- a/fdb-record-layer-core/src/main/proto/record_query_plan.proto +++ b/fdb-record-layer-core/src/main/proto/record_query_plan.proto @@ -254,6 +254,7 @@ message PValue { PRangeValue range_value = 48; PFirstOrDefaultStreamingValue first_or_default_streaming_value = 49; PEvaluatesToValue evaluates_to_value = 50; + PConstantArrayDistinctValue constant_array_distinct_value = 51; } } @@ -1259,6 +1260,10 @@ message PRangeValue { optional PValue step_child = 3; } +message PConstantArrayDistinctValue { + optional PValue child_value = 1; +} + // // Comparisons // diff --git a/fdb-record-layer-core/src/test/java/com/apple/foundationdb/record/query/plan/cascades/values/ConstantArrayDistinctValueTest.java b/fdb-record-layer-core/src/test/java/com/apple/foundationdb/record/query/plan/cascades/values/ConstantArrayDistinctValueTest.java new file mode 100644 index 0000000000..5b9ffb1cb6 --- /dev/null +++ b/fdb-record-layer-core/src/test/java/com/apple/foundationdb/record/query/plan/cascades/values/ConstantArrayDistinctValueTest.java @@ -0,0 +1,113 @@ +/* + * ConstantArrayDistinctValueTest.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2015-2025 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.record.query.plan.cascades.values; + +import com.apple.foundationdb.record.EvaluationContext; +import com.apple.foundationdb.record.query.plan.cascades.CorrelationIdentifier; +import com.apple.foundationdb.record.query.plan.cascades.Quantifier; +import com.apple.foundationdb.record.query.plan.cascades.typing.Type; +import com.google.common.base.VerifyException; +import com.google.common.collect.ImmutableList; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.List; +import java.util.stream.Stream; + +import static org.junit.jupiter.params.provider.Arguments.arguments; + +class ConstantArrayDistinctValueTest { + + @Test + void rejectsNonConstantValues() { + Assertions.assertThrowsExactly(VerifyException.class, () -> { + new ConstantArrayDistinctValue( + QuantifiedObjectValue.of(CorrelationIdentifier.uniqueID(), new Type.Array()) + ); + }); + } + + @Test + void rejectsNonArrayValues() { + Assertions.assertThrowsExactly(VerifyException.class, () -> { + new ConstantArrayDistinctValue(LiteralValue.ofScalar(42)); + }); + } + + static Stream arraysSource() { + return Stream.of( + arguments(ImmutableList.of(1, 2, 1, 2, 1, 2, 3), ImmutableList.of(1, 2, 3)), + arguments(ImmutableList.of(1, 2, 3, 4, 5), ImmutableList.of(1, 2, 3, 4, 5)), + arguments( + ImmutableList.of("val2", "val1", "val3", "val1", "val2"), + ImmutableList.of("val2", "val1", "val3") + ) + ); + } + + @ParameterizedTest(name = "returnsArrayWithoutDuplicates[input={0}, expected={1}])") + @MethodSource("arraysSource") + void returnsArrayWithoutDuplicates(List inputArray, List expectedArray) { + final var literalValue = LiteralValue.ofList(inputArray); + + final var constantArrayDistinctValue = new ConstantArrayDistinctValue(literalValue); + final var actualArray = constantArrayDistinctValue.evalWithoutStore(EvaluationContext.EMPTY); + + Assertions.assertEquals(expectedArray, actualArray); + } + + @ParameterizedTest(name = "builtInFunctionReturnsArrayWithoutDuplicates[input={0}, expected={1}])") + @MethodSource("arraysSource") + void builtInFunctionReturnsArrayWithoutDuplicates(List inputArray, List expectedArray) { + final var fn = new ConstantArrayDistinctValue.ConstantArrayDistinctFn(); + + final var retValue = fn.encapsulate(List.of(LiteralValue.ofList(inputArray))); + + Assertions.assertInstanceOf(ConstantArrayDistinctValue.class, retValue); + Assertions.assertEquals(expectedArray, ((Value)retValue).evalWithoutStore(EvaluationContext.EMPTY)); + } + + @Test + void withNewChildReplacesUnderlyingArray() { + final var expectedArray = ImmutableList.of(1, 2, 3); + final ConstantArrayDistinctValue value = new ConstantArrayDistinctValue(LiteralValue.ofList(ImmutableList.of(4, 5, 6))); + + final var newValue = value.withNewChild(LiteralValue.ofList(expectedArray)); + + Assertions.assertEquals(expectedArray, newValue.evalWithoutStore(EvaluationContext.EMPTY)); + } + + @Test + void equalsComparesUnderlyingValues() { + final var val1 = new ConstantArrayDistinctValue(LiteralValue.ofList(ImmutableList.of(5, 6, 7))); + final var val2 = new ConstantArrayDistinctValue(LiteralValue.ofList(ImmutableList.of(5, 6, 7))); + final var val3 = new ConstantArrayDistinctValue( + ConstantObjectValue.of(Quantifier.constant(), "c0", new Type.Array()) + ); + + Assertions.assertEquals(val1, val2); + Assertions.assertNotEquals(val1, val3); + Assertions.assertNotEquals(val2, val3); + } +} From 781971a9278e066e7aa8ec396fa4e329ec7e3e35 Mon Sep 17 00:00:00 2001 From: Hazem Zaghloul Date: Tue, 26 Aug 2025 14:51:43 +0100 Subject: [PATCH 2/2] Expose constant_array_distinct function in SQL --- .../src/main/antlr/RelationalLexer.g4 | 1 + .../src/main/antlr/RelationalParser.g4 | 2 +- .../query/functions/SqlFunctionCatalogImpl.java | 1 + .../relational/yamltests/Matchers.java | 11 +++++++++++ yaml-tests/src/test/resources/functions.yamsql | 15 +++++++++++++++ 5 files changed, 29 insertions(+), 1 deletion(-) diff --git a/fdb-relational-core/src/main/antlr/RelationalLexer.g4 b/fdb-relational-core/src/main/antlr/RelationalLexer.g4 index b729d6b5b5..6fc0b19445 100644 --- a/fdb-relational-core/src/main/antlr/RelationalLexer.g4 +++ b/fdb-relational-core/src/main/antlr/RelationalLexer.g4 @@ -373,6 +373,7 @@ VARIANCE: 'VARIANCE'; CURRENT_DATE: 'CURRENT_DATE'; CURRENT_TIME: 'CURRENT_TIME'; CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; +CONSTANT_ARRAY_DISTINCT: 'CONSTANT_ARRAY_DISTINCT'; LOCALTIME: 'LOCALTIME'; CURDATE: 'CURDATE'; CURTIME: 'CURTIME'; diff --git a/fdb-relational-core/src/main/antlr/RelationalParser.g4 b/fdb-relational-core/src/main/antlr/RelationalParser.g4 index 89e9cc6fb5..7053c88b7a 100644 --- a/fdb-relational-core/src/main/antlr/RelationalParser.g4 +++ b/fdb-relational-core/src/main/antlr/RelationalParser.g4 @@ -1289,7 +1289,7 @@ functionNameBase | BUFFER | CEIL | CEILING | CENTROID | CHARACTER_LENGTH | CHARSET | CHAR_LENGTH | COERCIBILITY | COLLATION | COMPRESS | COALESCE | CONCAT | CONCAT_WS | CONNECTION_ID | CONV - | CONVERT_TZ | COS | COT | CRC32 + | CONVERT_TZ | COS | COT | CRC32 | CONSTANT_ARRAY_DISTINCT | CREATE_ASYMMETRIC_PRIV_KEY | CREATE_ASYMMETRIC_PUB_KEY | CREATE_DH_PARAMETERS | CREATE_DIGEST | CROSSES | CUME_DIST | DATABASE | DATE | DATEDIFF | DATE_FORMAT | DAY | DAYNAME | DAYOFMONTH diff --git a/fdb-relational-core/src/main/java/com/apple/foundationdb/relational/recordlayer/query/functions/SqlFunctionCatalogImpl.java b/fdb-relational-core/src/main/java/com/apple/foundationdb/relational/recordlayer/query/functions/SqlFunctionCatalogImpl.java index f409f542c7..7aeaf3e393 100644 --- a/fdb-relational-core/src/main/java/com/apple/foundationdb/relational/recordlayer/query/functions/SqlFunctionCatalogImpl.java +++ b/fdb-relational-core/src/main/java/com/apple/foundationdb/relational/recordlayer/query/functions/SqlFunctionCatalogImpl.java @@ -149,6 +149,7 @@ private static ImmutableMap BuiltInFunctionCatalog.resolve("patternForLike", argumentsCount)) .put("__internal_array", argumentsCount -> BuiltInFunctionCatalog.resolve("array", argumentsCount)) .put("__pick_value", argumentsCount -> BuiltInFunctionCatalog.resolve("pick", argumentsCount)) + .put("constant_array_distinct", argumentsCount -> BuiltInFunctionCatalog.resolve("constant_array_distinct", argumentsCount)) .build(); } diff --git a/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/Matchers.java b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/Matchers.java index 4bc08f2661..3c1130a0fc 100644 --- a/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/Matchers.java +++ b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/Matchers.java @@ -613,6 +613,17 @@ private static ResultSetMatchResult matchField(@Nullable final Object expected, } } } + if (actualArrayContent.next()) { + return ResultSetMatchResult.fail(String.format( + Locale.ROOT, + "cell mismatch at row: %d cellRef: %s%n expected 🟢contains less items than 🟡.%n🟢 %s%n🟡 %s", + rowNumber, + cellRef, + expected, + actual + )); + } + return ResultSetMatchResult.success(); } diff --git a/yaml-tests/src/test/resources/functions.yamsql b/yaml-tests/src/test/resources/functions.yamsql index 20834dc437..ba0cbdb304 100644 --- a/yaml-tests/src/test/resources/functions.yamsql +++ b/yaml-tests/src/test/resources/functions.yamsql @@ -148,6 +148,21 @@ test_block: - query: update C set st = coalesce(st, (5, 'e', 5.0)) where c1 = 4 returning "new".st - unorderedResult: [ {{ T1: 5, A: 'e', B: 5.0}}] +--- +test_block: + preset: single_repetition_ordered + options: + supported_version: !current_version + tests: + - + - query: select constant_array_distinct([1, 1, 3, 3, 2, 5]) as arr, a1 from A + - result: [{ a1: 1, arr: [1, 3, 2, 5] }] + - + - query: select constant_array_distinct([1+1, 2*1, 3*0, 99*0, 5]) as arr, a1 from A + - result: [{ a1: 1, arr: [2, 0, 5] }] + - + - query: select constant_array_distinct([a2, 2*1, 3*0, 99*0, 5]) as arr, a1 from A + - error: '22F00' ...