diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index fcfb401fd9867..1c2f08fcc55e6 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -3149,6 +3149,18 @@ LogicalResult InsertOp::verify() { return success(); } +// Calculate the linearized position of the continuous chunk of elements to +// insert, based on the shape of the value to insert and the positions to insert +// at. +static int64_t calculateInsertPosition(VectorType destTy, + ArrayRef positions) { + llvm::SmallVector completePositions(destTy.getRank(), 0); + assert(positions.size() <= completePositions.size() && + "positions size must be less than or equal to destTy rank"); + copy(positions, completePositions.begin()); + return linearize(completePositions, computeStrides(destTy.getShape())); +} + namespace { // If insertOp is only inserting unit dimensions it can be transformed to a @@ -3191,6 +3203,126 @@ class InsertSplatToSplat final : public OpRewritePattern { } }; +/// Pattern to optimize a chain of insertions. +/// +/// This pattern identifies chains of vector.insert operations that: +/// 1. Only insert values at static positions. +/// 2. Completely initialize all elements in the resulting vector. +/// 3. All intermediate insert operations have only one use. +/// +/// When these conditions are met, the entire chain can be replaced with a +/// single vector.from_elements operation. +/// +/// Example transformation: +/// %poison = ub.poison : vector<2xi32> +/// %0 = vector.insert %c1, %poison[0] : i32 into vector<2xi32> +/// %1 = vector.insert %c2, %0[1] : i32 into vector<2xi32> +/// -> +/// %result = vector.from_elements %c1, %c2 : vector<2xi32> +class InsertChainFullyInitialized final : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(InsertOp op, + PatternRewriter &rewriter) const override { + + VectorType destTy = op.getDestVectorType(); + if (destTy.isScalable()) + return failure(); + // Check if the result is used as the dest operand of another vector.insert + // Only care about the last op in a chain of insertions. + for (Operation *user : op.getResult().getUsers()) + if (auto insertOp = dyn_cast(user)) + if (insertOp.getDest() == op.getResult()) + return failure(); + + InsertOp currentOp = op; + SmallVector chainInsertOps; + while (currentOp) { + // Dynamic position is not supported. + if (currentOp.hasDynamicPosition()) + return failure(); + + chainInsertOps.push_back(currentOp); + currentOp = currentOp.getDest().getDefiningOp(); + // Check that intermediate inserts have only one use to avoid an explosion + // of vectors. + if (currentOp && !currentOp->hasOneUse()) + return failure(); + } + + int64_t vectorSize = destTy.getNumElements(); + int64_t initializedCount = 0; + SmallVector initialized(vectorSize, false); + SmallVector pendingInsertPos; + SmallVector pendingInsertSize; + SmallVector pendingInsertValues; + + for (auto insertOp : chainInsertOps) { + // The insert op folder will fold an insert at poison index into a + // ub.poison, which truncates the insert chain's backward traversal. + if (is_contained(insertOp.getStaticPosition(), InsertOp::kPoisonIndex)) + return failure(); + + // Calculate the linearized position for inserting elements. + int64_t insertBeginPosition = + calculateInsertPosition(destTy, insertOp.getStaticPosition()); + + // The valueToStore operand may be a vector or a scalar. Need to handle + // both cases. + int64_t insertSize = 1; + if (auto srcVectorType = + llvm::dyn_cast(insertOp.getValueToStoreType())) + insertSize = srcVectorType.getNumElements(); + + assert(insertBeginPosition + insertSize <= vectorSize && + "insert would overflow the vector"); + + for (auto index : llvm::seq(insertBeginPosition, + insertBeginPosition + insertSize)) { + if (initialized[index]) + continue; + initialized[index] = true; + ++initializedCount; + } + + // Defer the creation of ops before we can make sure the pattern can + // succeed. + pendingInsertPos.push_back(insertBeginPosition); + pendingInsertSize.push_back(insertSize); + pendingInsertValues.push_back(insertOp.getValueToStore()); + + if (initializedCount == vectorSize) + break; + } + + // Final check: all positions must be initialized + if (initializedCount != vectorSize) + return failure(); + + SmallVector elements(vectorSize); + for (auto [insertBeginPosition, insertSize, valueToStore] : + llvm::reverse(llvm::zip(pendingInsertPos, pendingInsertSize, + pendingInsertValues))) { + if (auto srcVectorType = + llvm::dyn_cast(valueToStore.getType())) { + SmallVector strides = computeStrides(srcVectorType.getShape()); + // Get all elements from the vector in row-major order. + for (int64_t linearIdx = 0; linearIdx < insertSize; linearIdx++) { + SmallVector position = delinearize(linearIdx, strides); + Value extractedElement = rewriter.create( + op.getLoc(), valueToStore, position); + elements[insertBeginPosition + linearIdx] = extractedElement; + } + } else { + elements[insertBeginPosition] = valueToStore; + } + } + + rewriter.replaceOpWithNewOp(op, destTy, elements); + return success(); + } +}; + } // namespace static Attribute @@ -3217,13 +3349,9 @@ foldDenseElementsAttrDestInsertOp(InsertOp insertOp, Attribute srcAttr, !insertOp->hasOneUse()) return {}; - // Calculate the linearized position of the continuous chunk of elements to - // insert. - llvm::SmallVector completePositions(destTy.getRank(), 0); - copy(insertOp.getStaticPosition(), completePositions.begin()); + // Calculate the linearized position for inserting elements. int64_t insertBeginPosition = - linearize(completePositions, computeStrides(destTy.getShape())); - + calculateInsertPosition(destTy, insertOp.getStaticPosition()); SmallVector insertedValues; Type destEltType = destTy.getElementType(); @@ -3256,7 +3384,8 @@ foldDenseElementsAttrDestInsertOp(InsertOp insertOp, Attribute srcAttr, void InsertOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results.add(context); } OpFoldResult vector::InsertOp::fold(FoldAdaptor adaptor) { diff --git a/mlir/test/Conversion/ConvertToSPIRV/vector-unroll.mlir b/mlir/test/Conversion/ConvertToSPIRV/vector-unroll.mlir index d68ba44ee8840..c85f4334ff2e5 100644 --- a/mlir/test/Conversion/ConvertToSPIRV/vector-unroll.mlir +++ b/mlir/test/Conversion/ConvertToSPIRV/vector-unroll.mlir @@ -83,20 +83,16 @@ func.func @vaddi_reduction(%arg0 : vector<8xi32>, %arg1 : vector<8xi32>) -> (i32 // CHECK-LABEL: @transpose // CHECK-SAME: (%[[ARG0:.+]]: vector<3xi32>, %[[ARG1:.+]]: vector<3xi32>) func.func @transpose(%arg0 : vector<2x3xi32>) -> (vector<3x2xi32>) { - // CHECK: %[[UB:.*]] = ub.poison : vector<2xi32> // CHECK: %[[EXTRACT0:.*]] = vector.extract %[[ARG0]][0] : i32 from vector<3xi32> - // CHECK: %[[INSERT0:.*]]= vector.insert %[[EXTRACT0]], %[[UB]] [0] : i32 into vector<2xi32> // CHECK: %[[EXTRACT1:.*]] = vector.extract %[[ARG1]][0] : i32 from vector<3xi32> - // CHECK: %[[INSERT1:.*]] = vector.insert %[[EXTRACT1]], %[[INSERT0]][1] : i32 into vector<2xi32> + // CHECK: %[[FROM_ELEMENTS0:.*]] = vector.from_elements %[[EXTRACT0]], %[[EXTRACT1]] : vector<2xi32> // CHECK: %[[EXTRACT2:.*]] = vector.extract %[[ARG0]][1] : i32 from vector<3xi32> - // CHECK: %[[INSERT2:.*]] = vector.insert %[[EXTRACT2]], %[[UB]] [0] : i32 into vector<2xi32> // CHECK: %[[EXTRACT3:.*]] = vector.extract %[[ARG1]][1] : i32 from vector<3xi32> - // CHECK: %[[INSERT3:.*]] = vector.insert %[[EXTRACT3]], %[[INSERT2]] [1] : i32 into vector<2xi32> + // CHECK: %[[FROM_ELEMENTS1:.*]] = vector.from_elements %[[EXTRACT2]], %[[EXTRACT3]] : vector<2xi32> // CHECK: %[[EXTRACT4:.*]] = vector.extract %[[ARG0]][2] : i32 from vector<3xi32> - // CHECK: %[[INSERT4:.*]] = vector.insert %[[EXTRACT4]], %[[UB]] [0] : i32 into vector<2xi32> // CHECK: %[[EXTRACT5:.*]] = vector.extract %[[ARG1]][2] : i32 from vector<3xi32> - // CHECK: %[[INSERT5:.*]] = vector.insert %[[EXTRACT5]], %[[INSERT4]] [1] : i32 into vector<2xi32> - // CHECK: return %[[INSERT1]], %[[INSERT3]], %[[INSERT5]] : vector<2xi32>, vector<2xi32>, vector<2xi32> + // CHECK: %[[FROM_ELEMENTS2:.*]] = vector.from_elements %[[EXTRACT4]], %[[EXTRACT5]] : vector<2xi32> + // CHECK: return %[[FROM_ELEMENTS0]], %[[FROM_ELEMENTS1]], %[[FROM_ELEMENTS2]] : vector<2xi32>, vector<2xi32>, vector<2xi32> %0 = vector.transpose %arg0, [1, 0] : vector<2x3xi32> to vector<3x2xi32> return %0 : vector<3x2xi32> } diff --git a/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir b/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir index 08354dbf280c1..26b54566cb2cd 100644 --- a/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir +++ b/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir @@ -79,21 +79,17 @@ func.func @absf_caller(%float: f32, %double: f64) -> (f32, f64) { // CHECK-LABEL: func @absf_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @fabsf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @fabsf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @fabs(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @fabs(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } func.func @absf_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { %float_result = math.absf %float : vector<2xf32> @@ -116,21 +112,17 @@ func.func @acos_caller(%float: f32, %double: f64) -> (f32, f64) { // CHECK-LABEL: func @acos_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @acosf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @acosf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @acos(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @acos(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } func.func @acos_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { %float_result = math.acos %float : vector<2xf32> @@ -153,21 +145,17 @@ func.func @acosh_caller(%float: f32, %double: f64) -> (f32, f64) { // CHECK-LABEL: func @acosh_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @acoshf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @acoshf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @acosh(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @acosh(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } func.func @acosh_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { %float_result = math.acosh %float : vector<2xf32> @@ -190,21 +178,17 @@ func.func @asin_caller(%float: f32, %double: f64) -> (f32, f64) { // CHECK-LABEL: func @asin_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @asinf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @asinf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @asin(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @asin(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } func.func @asin_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { %float_result = math.asin %float : vector<2xf32> @@ -227,21 +211,17 @@ func.func @asinh_caller(%float: f32, %double: f64) -> (f32, f64) { // CHECK-LABEL: func @asinh_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @asinhf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @asinhf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @asinh(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @asinh(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } func.func @asinh_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { %float_result = math.asinh %float : vector<2xf32> @@ -274,21 +254,17 @@ func.func @atan_caller(%float: f32, %double: f64, %half: f16, %bfloat: bf16) -> // CHECK-LABEL: func @atan_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @atanf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @atanf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @atan(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @atan(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } func.func @atan_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { %float_result = math.atan %float : vector<2xf32> @@ -321,21 +297,17 @@ func.func @atanh_caller(%float: f32, %double: f64, %half: f16, %bfloat: bf16) -> // CHECK-LABEL: func @atanh_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @atanhf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @atanhf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @atanh(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @atanh(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } func.func @atanh_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { %float_result = math.atanh %float : vector<2xf32> @@ -419,23 +391,19 @@ func.func @erf_caller(%float: f32, %double: f64) -> (f32, f64) { // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { func.func @erf_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { - // CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> - // CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @erff(%[[IN0_F32]]) : (f32) -> f32 - // CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @erff(%[[IN1_F32]]) : (f32) -> f32 - // CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> + // CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> %float_result = math.erf %float : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @erf(%[[IN0_F64]]) : (f64) -> f64 - // CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @erf(%[[IN1_F64]]) : (f64) -> f64 - // CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> + // CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> %double_result = math.erf %double : vector<2xf64> - // CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> + // CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> return %float_result, %double_result : vector<2xf32>, vector<2xf64> } @@ -459,21 +427,17 @@ func.func @exp_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vec // CHECK-LABEL: func @exp_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @expf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @expf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @exp(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @exp(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } // CHECK-LABEL: func @exp2_caller @@ -496,21 +460,17 @@ func.func @exp2_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (ve // CHECK-LABEL: func @exp2_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @exp2f(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @exp2f(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @exp2(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @exp2(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } // CHECK-LABEL: func @log_caller @@ -533,21 +493,17 @@ func.func @log_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vec // CHECK-LABEL: func @log_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @logf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @logf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @log(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @log(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } // CHECK-LABEL: func @log2_caller @@ -570,21 +526,17 @@ func.func @log2_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (ve // CHECK-LABEL: func @log2_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @log2f(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @log2f(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @log2(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @log2(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } // CHECK-LABEL: func @log10_caller @@ -607,21 +559,17 @@ func.func @log10_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (v // CHECK-LABEL: func @log10_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @log10f(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @log10f(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @log10(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @log10(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } // CHECK-LABEL: func @expm1_caller @@ -644,21 +592,17 @@ func.func @expm1_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (v // CHECK-LABEL: func @expm1_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @expm1f(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @expm1f(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @expm1(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @expm1(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } func.func @expm1_multidim_vec_caller(%float: vector<2x2xf32>) -> (vector<2x2xf32>) { @@ -667,20 +611,16 @@ func.func @expm1_multidim_vec_caller(%float: vector<2x2xf32>) -> (vector<2x2xf32 } // CHECK-LABEL: func @expm1_multidim_vec_caller( // CHECK-SAME: %[[VAL:.*]]: vector<2x2xf32> -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf32> // CHECK: %[[IN0_0_F32:.*]] = vector.extract %[[VAL]][0, 0] : f32 from vector<2x2xf32> // CHECK: %[[OUT0_0_F32:.*]] = call @expm1f(%[[IN0_0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_1:.*]] = vector.insert %[[OUT0_0_F32]], %[[CVF]] [0, 0] : f32 into vector<2x2xf32> // CHECK: %[[IN0_1_F32:.*]] = vector.extract %[[VAL]][0, 1] : f32 from vector<2x2xf32> // CHECK: %[[OUT0_1_F32:.*]] = call @expm1f(%[[IN0_1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_2:.*]] = vector.insert %[[OUT0_1_F32]], %[[VAL_1]] [0, 1] : f32 into vector<2x2xf32> // CHECK: %[[IN1_0_F32:.*]] = vector.extract %[[VAL]][1, 0] : f32 from vector<2x2xf32> // CHECK: %[[OUT1_0_F32:.*]] = call @expm1f(%[[IN1_0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_3:.*]] = vector.insert %[[OUT1_0_F32]], %[[VAL_2]] [1, 0] : f32 into vector<2x2xf32> // CHECK: %[[IN1_1_F32:.*]] = vector.extract %[[VAL]][1, 1] : f32 from vector<2x2xf32> // CHECK: %[[OUT1_1_F32:.*]] = call @expm1f(%[[IN1_1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_4:.*]] = vector.insert %[[OUT1_1_F32]], %[[VAL_3]] [1, 1] : f32 into vector<2x2xf32> -// CHECK: return %[[VAL_4]] : vector<2x2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_0_F32]], %[[OUT0_1_F32]], %[[OUT1_0_F32]], %[[OUT1_1_F32]] : vector<2x2xf32> +// CHECK: return %[[RES_F32]] : vector<2x2xf32> // CHECK: } // CHECK-LABEL: func @fma_caller( @@ -704,29 +644,25 @@ func.func @fma_vec_caller(%float_a: vector<2xf32>, %float_b: vector<2xf32>, %flo // CHECK-SAME: %[[VAL_0A:.*]]: vector<2xf32>, %[[VAL_0B:.*]]: vector<2xf32>, %[[VAL_0C:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1A:.*]]: vector<2xf64>, %[[VAL_1B:.*]]: vector<2xf64>, %[[VAL_1C:.*]]: vector<2xf64> // CHECK-SAME: ) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32A:.*]] = vector.extract %[[VAL_0A]][0] : f32 from vector<2xf32> // CHECK: %[[IN0_F32B:.*]] = vector.extract %[[VAL_0B]][0] : f32 from vector<2xf32> // CHECK: %[[IN0_F32C:.*]] = vector.extract %[[VAL_0C]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @fmaf(%[[IN0_F32A]], %[[IN0_F32B]], %[[IN0_F32C]]) : (f32, f32, f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32A:.*]] = vector.extract %[[VAL_0A]][1] : f32 from vector<2xf32> // CHECK: %[[IN1_F32B:.*]] = vector.extract %[[VAL_0B]][1] : f32 from vector<2xf32> // CHECK: %[[IN1_F32C:.*]] = vector.extract %[[VAL_0C]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @fmaf(%[[IN1_F32A]], %[[IN1_F32B]], %[[IN1_F32C]]) : (f32, f32, f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64A:.*]] = vector.extract %[[VAL_1A]][0] : f64 from vector<2xf64> // CHECK: %[[IN0_F64B:.*]] = vector.extract %[[VAL_1B]][0] : f64 from vector<2xf64> // CHECK: %[[IN0_F64C:.*]] = vector.extract %[[VAL_1C]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @fma(%[[IN0_F64A]], %[[IN0_F64B]], %[[IN0_F64C]]) : (f64, f64, f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64A:.*]] = vector.extract %[[VAL_1A]][1] : f64 from vector<2xf64> // CHECK: %[[IN1_F64B:.*]] = vector.extract %[[VAL_1B]][1] : f64 from vector<2xf64> // CHECK: %[[IN1_F64C:.*]] = vector.extract %[[VAL_1C]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @fma(%[[IN1_F64A]], %[[IN1_F64B]], %[[IN1_F64C]]) : (f64, f64, f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } // CHECK-LABEL: func @round_caller @@ -814,23 +750,19 @@ func.func @sin_caller(%float: f32, %double: f64) -> (f32, f64) { // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { func.func @round_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { - // CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> - // CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @roundf(%[[IN0_F32]]) : (f32) -> f32 - // CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @roundf(%[[IN1_F32]]) : (f32) -> f32 - // CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> + // CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> %float_result = math.round %float : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @round(%[[IN0_F64]]) : (f64) -> f64 - // CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @round(%[[IN1_F64]]) : (f64) -> f64 - // CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> + // CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> %double_result = math.round %double : vector<2xf64> - // CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> + // CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> return %float_result, %double_result : vector<2xf32>, vector<2xf64> } @@ -838,23 +770,19 @@ func.func @round_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (v // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { func.func @roundeven_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { - // CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> - // CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @roundevenf(%[[IN0_F32]]) : (f32) -> f32 - // CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @roundevenf(%[[IN1_F32]]) : (f32) -> f32 - // CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> + // CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> %float_result = math.roundeven %float : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @roundeven(%[[IN0_F64]]) : (f64) -> f64 - // CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @roundeven(%[[IN1_F64]]) : (f64) -> f64 - // CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> + // CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> %double_result = math.roundeven %double : vector<2xf64> - // CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> + // CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> return %float_result, %double_result : vector<2xf32>, vector<2xf64> } @@ -862,23 +790,19 @@ func.func @roundeven_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) - // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { func.func @trunc_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { - // CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> - // CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @truncf(%[[IN0_F32]]) : (f32) -> f32 - // CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @truncf(%[[IN1_F32]]) : (f32) -> f32 - // CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> + // CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> %float_result = math.trunc %float : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @trunc(%[[IN0_F64]]) : (f64) -> f64 - // CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @trunc(%[[IN1_F64]]) : (f64) -> f64 - // CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> + // CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> %double_result = math.trunc %double : vector<2xf64> - // CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> + // CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> return %float_result, %double_result : vector<2xf32>, vector<2xf64> } @@ -907,21 +831,17 @@ func.func @tan_caller(%float: f32, %double: f64, %half: f16, %bfloat: bf16) -> ( // CHECK-LABEL: func @tan_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @tanf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @tanf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @tan(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @tan(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } func.func @tan_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { %float_result = math.tan %float : vector<2xf32> @@ -985,21 +905,17 @@ func.func @sqrt_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (ve // CHECK-LABEL: func @sqrt_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @sqrtf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @sqrtf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @sqrt(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @sqrt(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } // CHECK-LABEL: func @rsqrt_caller @@ -1022,21 +938,17 @@ func.func @rsqrt_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (v // CHECK-LABEL: func @rsqrt_vec_caller( // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @rsqrtf(%[[IN0_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @rsqrtf(%[[IN1_F32]]) : (f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @rsqrt(%[[IN0_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @rsqrt(%[[IN1_F64]]) : (f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } // CHECK-LABEL: func @powf_caller( @@ -1060,23 +972,19 @@ func.func @powf_vec_caller(%float_a: vector<2xf32>, %float_b: vector<2xf32>, %do // CHECK-SAME: %[[VAL_0A:.*]]: vector<2xf32>, %[[VAL_0B:.*]]: vector<2xf32>, // CHECK-SAME: %[[VAL_1A:.*]]: vector<2xf64>, %[[VAL_1B:.*]]: vector<2xf64> // CHECK-SAME: ) -> (vector<2xf32>, vector<2xf64>) { -// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> -// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> // CHECK: %[[IN0_F32A:.*]] = vector.extract %[[VAL_0A]][0] : f32 from vector<2xf32> // CHECK: %[[IN0_F32B:.*]] = vector.extract %[[VAL_0B]][0] : f32 from vector<2xf32> // CHECK: %[[OUT0_F32:.*]] = call @powf(%[[IN0_F32A]], %[[IN0_F32B]]) : (f32, f32) -> f32 -// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> // CHECK: %[[IN1_F32A:.*]] = vector.extract %[[VAL_0A]][1] : f32 from vector<2xf32> // CHECK: %[[IN1_F32B:.*]] = vector.extract %[[VAL_0B]][1] : f32 from vector<2xf32> // CHECK: %[[OUT1_F32:.*]] = call @powf(%[[IN1_F32A]], %[[IN1_F32B]]) : (f32, f32) -> f32 -// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[RES_F32:.*]] = vector.from_elements %[[OUT0_F32]], %[[OUT1_F32]] : vector<2xf32> // CHECK: %[[IN0_F64A:.*]] = vector.extract %[[VAL_1A]][0] : f64 from vector<2xf64> // CHECK: %[[IN0_F64B:.*]] = vector.extract %[[VAL_1B]][0] : f64 from vector<2xf64> // CHECK: %[[OUT0_F64:.*]] = call @pow(%[[IN0_F64A]], %[[IN0_F64B]]) : (f64, f64) -> f64 -// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> // CHECK: %[[IN1_F64A:.*]] = vector.extract %[[VAL_1A]][1] : f64 from vector<2xf64> // CHECK: %[[IN1_F64B:.*]] = vector.extract %[[VAL_1B]][1] : f64 from vector<2xf64> // CHECK: %[[OUT1_F64:.*]] = call @pow(%[[IN1_F64A]], %[[IN1_F64B]]) : (f64, f64) -> f64 -// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> -// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: %[[RES_F64:.*]] = vector.from_elements %[[OUT0_F64]], %[[OUT1_F64]] : vector<2xf64> +// CHECK: return %[[RES_F32]], %[[RES_F64]] : vector<2xf32>, vector<2xf64> // CHECK: } diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index a06a9f67d54dc..1d9817f83f695 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -566,40 +566,40 @@ func.func @insert_extract_transpose_2d( // ----- // CHECK-LABEL: insert_extract_chain -// CHECK-SAME: %[[V234:[a-zA-Z0-9]*]]: vector<2x3x4xf32> +// CHECK-SAME: %[[V334:[a-zA-Z0-9]*]]: vector<3x3x4xf32> // CHECK-SAME: %[[V34:[a-zA-Z0-9]*]]: vector<3x4xf32> // CHECK-SAME: %[[V4:[a-zA-Z0-9]*]]: vector<4xf32> -func.func @insert_extract_chain(%v234: vector<2x3x4xf32>, %v34: vector<3x4xf32>, %v4: vector<4xf32>) +func.func @insert_extract_chain(%v334: vector<3x3x4xf32>, %v34: vector<3x4xf32>, %v4: vector<4xf32>) -> (vector<4xf32>, vector<4xf32>, vector<3x4xf32>, vector<3x4xf32>) { // CHECK-NEXT: %[[A34:.*]] = vector.insert - %A34 = vector.insert %v34, %v234[0]: vector<3x4xf32> into vector<2x3x4xf32> + %A34 = vector.insert %v34, %v334[0]: vector<3x4xf32> into vector<3x3x4xf32> // CHECK-NEXT: %[[B34:.*]] = vector.insert - %B34 = vector.insert %v34, %A34[1]: vector<3x4xf32> into vector<2x3x4xf32> + %B34 = vector.insert %v34, %A34[1]: vector<3x4xf32> into vector<3x3x4xf32> // CHECK-NEXT: %[[A4:.*]] = vector.insert - %A4 = vector.insert %v4, %B34[1, 0]: vector<4xf32> into vector<2x3x4xf32> + %A4 = vector.insert %v4, %B34[1, 0]: vector<4xf32> into vector<3x3x4xf32> // CHECK-NEXT: %[[B4:.*]] = vector.insert - %B4 = vector.insert %v4, %A4[1, 1]: vector<4xf32> into vector<2x3x4xf32> + %B4 = vector.insert %v4, %A4[1, 1]: vector<4xf32> into vector<3x3x4xf32> // Case 2.a. [1, 1] == insertpos ([1, 1]) // Match %A4 insertionpos and fold to its source(i.e. %V4). - %r0 = vector.extract %B4[1, 1]: vector<4xf32> from vector<2x3x4xf32> + %r0 = vector.extract %B4[1, 1]: vector<4xf32> from vector<3x3x4xf32> // Case 3.a. insertpos ([1]) is a prefix of [1, 0]. // Traverse %B34 to its source(i.e. %V34@[*0*]). // CHECK-NEXT: %[[R1:.*]] = vector.extract %[[V34]][0] - %r1 = vector.extract %B34[1, 0]: vector<4xf32> from vector<2x3x4xf32> + %r1 = vector.extract %B34[1, 0]: vector<4xf32> from vector<3x3x4xf32> // Case 4. [1] is a prefix of insertpos ([1, 1]). // Cannot traverse %B4. // CHECK-NEXT: %[[R2:.*]] = vector.extract %[[B4]][1] - %r2 = vector.extract %B4[1]: vector<3x4xf32> from vector<2x3x4xf32> + %r2 = vector.extract %B4[1]: vector<3x4xf32> from vector<3x3x4xf32> // Case 5. [0] is disjoint from insertpos ([1, 1]). // Traverse %B4 to its dest(i.e. %A4@[0]). // Traverse %A4 to its dest(i.e. %B34@[0]). // Traverse %B34 to its dest(i.e. %A34@[0]). // Match %A34 insertionpos and fold to its source(i.e. %V34). - %r3 = vector.extract %B4[0]: vector<3x4xf32> from vector<2x3x4xf32> + %r3 = vector.extract %B4[0]: vector<3x4xf32> from vector<3x3x4xf32> // CHECK: return %[[V4]], %[[R1]], %[[R2]], %[[V34]] return %r0, %r1, %r2, %r3: @@ -946,8 +946,8 @@ func.func @insert_fold_same_rank(%v: vector<2x2xf32>) -> vector<2x2xf32> { // CHECK-LABEL: func @insert_no_fold_scalar_to_0d( // CHECK-SAME: %[[v:.*]]: vector) -// CHECK: %[[extract:.*]] = vector.insert %{{.*}}, %[[v]] [] : f32 into vector -// CHECK: return %[[extract]] +// CHECK: %[[cst:.*]] = arith.constant dense<0.000000e+00> : vector +// CHECK: return %[[cst]] func.func @insert_no_fold_scalar_to_0d(%v: vector) -> vector { %cst = arith.constant 0.000000e+00 : f32 %0 = vector.insert %cst, %v [] : f32 into vector @@ -2320,6 +2320,44 @@ func.func @insert_2d_constant() -> (vector<2x3xi32>, vector<2x3xi32>, vector<2x3 // ----- +// CHECK-LABEL: func.func @fully_insert_scalar_to_vector( +// CHECK-SAME: %[[ARG0:.+]]: vector<2xi64>) +// CHECK: %[[CST10:.+]] = arith.constant 10 : i64 +// CHECK: %[[CST20:.+]] = arith.constant 20 : i64 +// CHECK: %[[RES:.+]] = vector.from_elements %[[CST10]], %[[CST20]] : vector<2xi64> +// CHECK-NEXT: return %[[RES]] +func.func @fully_insert_scalar_to_vector(%arg0 : vector<2xi64>) -> vector<2xi64> { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %e0 = arith.constant 10 : i64 + %e1 = arith.constant 20 : i64 + %v1 = vector.insert %e0, %arg0[%c0] : i64 into vector<2xi64> + %v2 = vector.insert %e1, %v1[%c1] : i64 into vector<2xi64> + return %v2 : vector<2xi64> +} + +// ----- + +// CHECK-LABEL: func.func @fully_insert_vector_to_vector( +// CHECK-SAME: %[[ARG0:.+]]: vector<2x2xi64> +// CHECK: %[[CST1:.+]] = arith.constant 1 : i64 +// CHECK: %[[CST2:.+]] = arith.constant 2 : i64 +// CHECK: %[[CST3:.+]] = arith.constant 3 : i64 +// CHECK: %[[CST4:.+]] = arith.constant 4 : i64 +// CHECK: %[[RES:.+]] = vector.from_elements %[[CST1]], %[[CST2]], %[[CST3]], %[[CST4]] : vector<2x2xi64> +// CHECK-NEXT: return %[[RES]] +func.func @fully_insert_vector_to_vector(%arg0 : vector<2x2xi64>) -> vector<2x2xi64> { + %cv0 = arith.constant dense<[1, 2]> : vector<2xi64> + %cv1 = arith.constant dense<[3, 4]> : vector<2xi64> + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %v1 = vector.insert %cv0, %arg0[%c0] : vector<2xi64> into vector<2x2xi64> + %v2 = vector.insert %cv1, %v1[%c1] : vector<2xi64> into vector<2x2xi64> + return %v2 : vector<2x2xi64> +} + +// ----- + // CHECK-LABEL: func.func @insert_2d_splat_constant // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<2x3xi32> // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[99, 0, 0\], \[0, 0, 0\]\]}}> : vector<2x3xi32> diff --git a/mlir/test/Dialect/Vector/vector-gather-lowering.mlir b/mlir/test/Dialect/Vector/vector-gather-lowering.mlir index 5be267c1be984..246aeeb5cfaf6 100644 --- a/mlir/test/Dialect/Vector/vector-gather-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-gather-lowering.mlir @@ -198,7 +198,7 @@ func.func @gather_memref_non_unit_stride_read_more_than_1_element(%base: memref< // CANON-NOT: scf.if // CANON: tensor.extract // CANON: tensor.extract -// CANON: [[FINAL:%.+]] = vector.insert %{{.+}}, %{{.+}} [1] : f32 into vector<2xf32> +// CANON: [[FINAL:%.+]] = vector.from_elements %{{.+}}, %{{.+}} : vector<2xf32> // CANON-NEXT: return [[FINAL]] : vector<2xf32> func.func @gather_tensor_1d_all_set(%base: tensor, %v: vector<2xindex>, %pass_thru: vector<2xf32>) -> vector<2xf32> { %mask = arith.constant dense : vector<2xi1> diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir index 38771f2593449..9e83af35c3161 100644 --- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir +++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir @@ -1191,7 +1191,7 @@ func.func @vector_insertelement_1d_broadcast(%laneid: index, %pos: index) -> (ve // CHECK-PROP: %[[VEC:.*]] = "some_def" // CHECK-PROP: %[[VAL:.*]] = "another_def" // CHECK-PROP: gpu.yield %[[VEC]], %[[VAL]] -// CHECK-PROP: vector.insert %[[W]]#1, %[[W]]#0 [] : f32 into vector +// CHECK-PROP: vector.splat %[[W]]#1 : vector func.func @vector_insertelement_0d(%laneid: index) -> (vector) { %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector) { %0 = "some_def"() : () -> (vector)